|    522   interpret XY: pair_prob_space "distr M S X" "distr M T Y" .. |    541   interpret XY: pair_prob_space "distr M S X" "distr M T Y" .. | 
|    523   let ?P = "S \<Otimes>\<^sub>M T" |    542   let ?P = "S \<Otimes>\<^sub>M T" | 
|    524   let ?D = "distr M ?P (\<lambda>x. (X x, Y x))" |    543   let ?D = "distr M ?P (\<lambda>x. (X x, Y x))" | 
|    525  |    544  | 
|    526   { fix A assume "A \<in> sets S" |    545   { fix A assume "A \<in> sets S" | 
|    527     with X Y have "emeasure (distr M S X) A = emeasure ?D (A \<times> space T)" |    546     with X[THEN measurable_space] Y[THEN measurable_space] | 
|    528       by (auto simp: emeasure_distr measurable_Pair measurable_space |    547     have "emeasure (distr M S X) A = emeasure ?D (A \<times> space T)" | 
|    529                intro!: arg_cong[where f="emeasure M"]) } |    548       by (auto simp: emeasure_distr intro!: arg_cong[where f="emeasure M"]) } | 
|    530   note marginal_eq1 = this |    549   note marginal_eq1 = this | 
|    531   { fix A assume "A \<in> sets T" |    550   { fix A assume "A \<in> sets T" | 
|    532     with X Y have "emeasure (distr M T Y) A = emeasure ?D (space S \<times> A)" |    551     with X[THEN measurable_space] Y[THEN measurable_space] | 
|    533       by (auto simp: emeasure_distr measurable_Pair measurable_space |    552     have "emeasure (distr M T Y) A = emeasure ?D (space S \<times> A)" | 
|    534                intro!: arg_cong[where f="emeasure M"]) } |    553       by (auto simp: emeasure_distr intro!: arg_cong[where f="emeasure M"]) } | 
|    535   note marginal_eq2 = this |    554   note marginal_eq2 = this | 
|    536  |    555  | 
|    537   have eq: "(\<lambda>x. ereal (Px (fst x) * Py (snd x))) = (\<lambda>(x, y). ereal (Px x) * ereal (Py y))" |    556   have distr_eq: "distr M S X \<Otimes>\<^sub>M distr M T Y = density ?P (\<lambda>x. ennreal (Px (fst x) * Py (snd x)))" | 
|    538     by auto |    557     unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] | 
|    539  |         | 
|    540   have distr_eq: "distr M S X \<Otimes>\<^sub>M distr M T Y = density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))" |         | 
|    541     unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] eq |         | 
|    542   proof (subst pair_measure_density) |    558   proof (subst pair_measure_density) | 
|    543     show "(\<lambda>x. ereal (Px x)) \<in> borel_measurable S" "(\<lambda>y. ereal (Py y)) \<in> borel_measurable T" |    559     show "(\<lambda>x. ennreal (Px x)) \<in> borel_measurable S" "(\<lambda>y. ennreal (Py y)) \<in> borel_measurable T" | 
|    544       "AE x in S. 0 \<le> ereal (Px x)" "AE y in T. 0 \<le> ereal (Py y)" |         | 
|    545       using Px Py by (auto simp: distributed_def) |    560       using Px Py by (auto simp: distributed_def) | 
|    546     show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. |    561     show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. | 
|    547   qed (fact | simp)+ |    562     show "density (S \<Otimes>\<^sub>M T) (\<lambda>(x, y). ennreal (Px x) * ennreal (Py y)) = | 
|    548    |    563       density (S \<Otimes>\<^sub>M T) (\<lambda>x. ennreal (Px (fst x) * Py (snd x)))" | 
|    549   have M: "?M = KL_divergence b (density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))) (density ?P (\<lambda>x. ereal (Pxy x)))" |    564       using Px_nn Py_nn by (auto intro!: density_cong simp: distributed_def ennreal_mult space_pair_measure) | 
|         |    565   qed fact | 
|         |    566  | 
|         |    567   have M: "?M = KL_divergence b (density ?P (\<lambda>x. ennreal (Px (fst x) * Py (snd x)))) (density ?P (\<lambda>x. ennreal (Pxy x)))" | 
|    550     unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] .. |    568     unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] .. | 
|    551  |    569  | 
|    552   from Px Py have f: "(\<lambda>x. Px (fst x) * Py (snd x)) \<in> borel_measurable ?P" |    570   from Px Py have f: "(\<lambda>x. Px (fst x) * Py (snd x)) \<in> borel_measurable ?P" | 
|    553     by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') |    571     by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') | 
|    554   have PxPy_nonneg: "AE x in ?P. 0 \<le> Px (fst x) * Py (snd x)" |    572   have PxPy_nonneg: "AE x in ?P. 0 \<le> Px (fst x) * Py (snd x)" | 
|    555   proof (rule ST.AE_pair_measure) |    573     using Px_nn Py_nn by (auto simp: space_pair_measure) | 
|    556     show "{x \<in> space ?P. 0 \<le> Px (fst x) * Py (snd x)} \<in> sets ?P" |    574  | 
|    557       using f by auto |    575   have A: "(AE x in ?P. Px (fst x) = 0 \<longrightarrow> Pxy x = 0)" | 
|    558     show "AE x in S. AE y in T. 0 \<le> Px (fst (x, y)) * Py (snd (x, y))" |    576     by (rule subdensity_real[OF measurable_fst Pxy Px]) (insert Px_nn Pxy_nn, auto simp: space_pair_measure) | 
|    559       using Px Py by (auto simp: zero_le_mult_iff dest!: distributed_real_AE) |         | 
|    560   qed |         | 
|    561  |         | 
|    562   have "(AE x in ?P. Px (fst x) = 0 \<longrightarrow> Pxy x = 0)" |         | 
|    563     by (rule subdensity_real[OF measurable_fst Pxy Px]) auto |         | 
|    564   moreover |    577   moreover | 
|    565   have "(AE x in ?P. Py (snd x) = 0 \<longrightarrow> Pxy x = 0)" |    578   have B: "(AE x in ?P. Py (snd x) = 0 \<longrightarrow> Pxy x = 0)" | 
|    566     by (rule subdensity_real[OF measurable_snd Pxy Py]) auto |    579     by (rule subdensity_real[OF measurable_snd Pxy Py]) (insert Py_nn Pxy_nn, auto simp: space_pair_measure) | 
|    567   ultimately have ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 \<longrightarrow> Pxy x = 0" |    580   ultimately have ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 \<longrightarrow> Pxy x = 0" | 
|    568     by eventually_elim auto |    581     by eventually_elim auto | 
|    569  |    582  | 
|    570   show "?M = ?R" |    583   show "?M = ?R" | 
|    571     unfolding M f_def |    584     unfolding M f_def using Pxy_nn Px_nn Py_nn | 
|    572     using b_gt_1 f PxPy_nonneg Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] ac |    585     by (intro ST.KL_density_density b_gt_1 f PxPy_nonneg ac) (auto simp: space_pair_measure) | 
|    573     by (rule ST.KL_density_density) |         | 
|    574  |    586  | 
|    575   have X: "X = fst \<circ> (\<lambda>x. (X x, Y x))" and Y: "Y = snd \<circ> (\<lambda>x. (X x, Y x))" |    587   have X: "X = fst \<circ> (\<lambda>x. (X x, Y x))" and Y: "Y = snd \<circ> (\<lambda>x. (X x, Y x))" | 
|    576     by auto |    588     by auto | 
|    577  |    589  | 
|    578   have "integrable (S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)))" |    590   have "integrable (S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)))" | 
|    579     using finite_entropy_integrable[OF Fxy] |    591     using finite_entropy_integrable[OF Fxy] | 
|    580     using finite_entropy_integrable_transform[OF Fx Pxy, of fst] |    592     using finite_entropy_integrable_transform[OF Fx Pxy, of fst] | 
|    581     using finite_entropy_integrable_transform[OF Fy Pxy, of snd] |    593     using finite_entropy_integrable_transform[OF Fy Pxy, of snd] | 
|    582     by simp |    594     by (simp add: Pxy_nn) | 
|    583   moreover have "f \<in> borel_measurable (S \<Otimes>\<^sub>M T)" |    595   moreover have "f \<in> borel_measurable (S \<Otimes>\<^sub>M T)" | 
|    584     unfolding f_def using Px Py Pxy |    596     unfolding f_def using Px Py Pxy | 
|    585     by (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'' |    597     by (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'' | 
|    586       intro!: borel_measurable_times borel_measurable_log borel_measurable_divide) |    598       intro!: borel_measurable_times borel_measurable_log borel_measurable_divide) | 
|    587   ultimately have int: "integrable (S \<Otimes>\<^sub>M T) f" |    599   ultimately have int: "integrable (S \<Otimes>\<^sub>M T) f" | 
|    588     apply (rule integrable_cong_AE_imp) |    600     apply (rule integrable_cong_AE_imp) | 
|    589     using |    601     using A B AE_space | 
|    590       distributed_transform_AE[OF measurable_fst ac_fst, of T, OF T Px] |         | 
|    591       distributed_transform_AE[OF measurable_snd ac_snd, of _ _ _ _ S, OF T Py] |         | 
|    592       subdensity_real[OF measurable_fst Pxy Px X] |         | 
|    593       subdensity_real[OF measurable_snd Pxy Py Y] |         | 
|    594       distributed_real_AE[OF Pxy] |         | 
|    595     by eventually_elim |    602     by eventually_elim | 
|    596        (auto simp: f_def log_divide_eq log_mult_eq field_simps zero_less_mult_iff) |    603        (auto simp: f_def log_divide_eq log_mult_eq field_simps space_pair_measure Px_nn Py_nn Pxy_nn | 
|         |    604                   less_le) | 
|    597  |    605  | 
|    598   show "0 \<le> ?M" unfolding M |    606   show "0 \<le> ?M" unfolding M | 
|    599   proof (rule ST.KL_density_density_nonneg |    607   proof (intro ST.KL_density_density_nonneg) | 
|    600     [OF b_gt_1 f PxPy_nonneg _ Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] _ ac int[unfolded f_def]]) |    608     show "prob_space (density (S \<Otimes>\<^sub>M T) (\<lambda>x. ennreal (Pxy x))) " | 
|    601     show "prob_space (density (S \<Otimes>\<^sub>M T) (\<lambda>x. ereal (Pxy x))) " |         | 
|    602       unfolding distributed_distr_eq_density[OF Pxy, symmetric] |    609       unfolding distributed_distr_eq_density[OF Pxy, symmetric] | 
|    603       using distributed_measurable[OF Pxy] by (rule prob_space_distr) |    610       using distributed_measurable[OF Pxy] by (rule prob_space_distr) | 
|    604     show "prob_space (density (S \<Otimes>\<^sub>M T) (\<lambda>x. ereal (Px (fst x) * Py (snd x))))" |    611     show "prob_space (density (S \<Otimes>\<^sub>M T) (\<lambda>x. ennreal (Px (fst x) * Py (snd x))))" | 
|    605       unfolding distr_eq[symmetric] by unfold_locales |    612       unfolding distr_eq[symmetric] by unfold_locales | 
|    606   qed |    613     show "integrable (S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))))" | 
|    607 qed |    614       using int unfolding f_def . | 
|    608  |    615   qed (insert ac, auto simp: b_gt_1 Px_nn Py_nn Pxy_nn space_pair_measure) | 
|         |    616 qed | 
|    609  |    617  | 
|    610 lemma (in information_space) |    618 lemma (in information_space) | 
|    611   fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" |    619   fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" | 
|    612   assumes "sigma_finite_measure S" "sigma_finite_measure T" |    620   assumes "sigma_finite_measure S" "sigma_finite_measure T" | 
|    613   assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" |    621   assumes Px: "distributed M S X Px" and Px_nn: "\<And>x. x \<in> space S \<Longrightarrow> 0 \<le> Px x" | 
|    614   assumes Pxy: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy" |    622     and Py: "distributed M T Y Py" and Py_nn: "\<And>y. y \<in> space T \<Longrightarrow> 0 \<le> Py y" | 
|         |    623     and Pxy: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy" | 
|         |    624     and Pxy_nn: "\<And>x y. x \<in> space S \<Longrightarrow> y \<in> space T \<Longrightarrow> 0 \<le> Pxy (x, y)" | 
|    615   defines "f \<equiv> \<lambda>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" |    625   defines "f \<equiv> \<lambda>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" | 
|    616   shows mutual_information_distr: "mutual_information b S T X Y = integral\<^sup>L (S \<Otimes>\<^sub>M T) f" (is "?M = ?R") |    626   shows mutual_information_distr: "mutual_information b S T X Y = integral\<^sup>L (S \<Otimes>\<^sub>M T) f" (is "?M = ?R") | 
|    617     and mutual_information_nonneg: "integrable (S \<Otimes>\<^sub>M T) f \<Longrightarrow> 0 \<le> mutual_information b S T X Y" |    627     and mutual_information_nonneg: "integrable (S \<Otimes>\<^sub>M T) f \<Longrightarrow> 0 \<le> mutual_information b S T X Y" | 
|    618 proof - |    628 proof - | 
|    619   have X: "random_variable S X" |    629   have X[measurable]: "random_variable S X" | 
|    620     using Px by (auto simp: distributed_def) |    630     using Px by (auto simp: distributed_def) | 
|    621   have Y: "random_variable T Y" |    631   have Y[measurable]: "random_variable T Y" | 
|    622     using Py by (auto simp: distributed_def) |    632     using Py by (auto simp: distributed_def) | 
|         |    633   have [measurable]: "Px \<in> S \<rightarrow>\<^sub>M borel" | 
|         |    634     using Px Px_nn by (intro distributed_real_measurable) | 
|         |    635   have [measurable]: "Py \<in> T \<rightarrow>\<^sub>M borel" | 
|         |    636     using Py Py_nn by (intro distributed_real_measurable) | 
|         |    637   have measurable_Pxy[measurable]: "Pxy \<in> (S \<Otimes>\<^sub>M T) \<rightarrow>\<^sub>M borel" | 
|         |    638     using Pxy Pxy_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure) | 
|         |    639  | 
|    623   interpret S: sigma_finite_measure S by fact |    640   interpret S: sigma_finite_measure S by fact | 
|    624   interpret T: sigma_finite_measure T by fact |    641   interpret T: sigma_finite_measure T by fact | 
|    625   interpret ST: pair_sigma_finite S T .. |    642   interpret ST: pair_sigma_finite S T .. | 
|    626   interpret X: prob_space "distr M S X" using X by (rule prob_space_distr) |    643   interpret X: prob_space "distr M S X" using X by (rule prob_space_distr) | 
|    627   interpret Y: prob_space "distr M T Y" using Y by (rule prob_space_distr) |    644   interpret Y: prob_space "distr M T Y" using Y by (rule prob_space_distr) | 
|    628   interpret XY: pair_prob_space "distr M S X" "distr M T Y" .. |    645   interpret XY: pair_prob_space "distr M S X" "distr M T Y" .. | 
|    629   let ?P = "S \<Otimes>\<^sub>M T" |    646   let ?P = "S \<Otimes>\<^sub>M T" | 
|    630   let ?D = "distr M ?P (\<lambda>x. (X x, Y x))" |    647   let ?D = "distr M ?P (\<lambda>x. (X x, Y x))" | 
|    631  |    648  | 
|    632   { fix A assume "A \<in> sets S" |    649   { fix A assume "A \<in> sets S" | 
|    633     with X Y have "emeasure (distr M S X) A = emeasure ?D (A \<times> space T)" |    650     with X[THEN measurable_space] Y[THEN measurable_space] | 
|    634       by (auto simp: emeasure_distr measurable_Pair measurable_space |    651     have "emeasure (distr M S X) A = emeasure ?D (A \<times> space T)" | 
|    635                intro!: arg_cong[where f="emeasure M"]) } |    652       by (auto simp: emeasure_distr intro!: arg_cong[where f="emeasure M"]) } | 
|    636   note marginal_eq1 = this |    653   note marginal_eq1 = this | 
|    637   { fix A assume "A \<in> sets T" |    654   { fix A assume "A \<in> sets T" | 
|    638     with X Y have "emeasure (distr M T Y) A = emeasure ?D (space S \<times> A)" |    655     with X[THEN measurable_space] Y[THEN measurable_space] | 
|    639       by (auto simp: emeasure_distr measurable_Pair measurable_space |    656     have "emeasure (distr M T Y) A = emeasure ?D (space S \<times> A)" | 
|    640                intro!: arg_cong[where f="emeasure M"]) } |    657       by (auto simp: emeasure_distr intro!: arg_cong[where f="emeasure M"]) } | 
|    641   note marginal_eq2 = this |    658   note marginal_eq2 = this | 
|    642  |    659  | 
|    643   have eq: "(\<lambda>x. ereal (Px (fst x) * Py (snd x))) = (\<lambda>(x, y). ereal (Px x) * ereal (Py y))" |    660   have distr_eq: "distr M S X \<Otimes>\<^sub>M distr M T Y = density ?P (\<lambda>x. ennreal (Px (fst x) * Py (snd x)))" | 
|    644     by auto |    661     unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] | 
|    645  |         | 
|    646   have distr_eq: "distr M S X \<Otimes>\<^sub>M distr M T Y = density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))" |         | 
|    647     unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] eq |         | 
|    648   proof (subst pair_measure_density) |    662   proof (subst pair_measure_density) | 
|    649     show "(\<lambda>x. ereal (Px x)) \<in> borel_measurable S" "(\<lambda>y. ereal (Py y)) \<in> borel_measurable T" |    663     show "(\<lambda>x. ennreal (Px x)) \<in> borel_measurable S" "(\<lambda>y. ennreal (Py y)) \<in> borel_measurable T" | 
|    650       "AE x in S. 0 \<le> ereal (Px x)" "AE y in T. 0 \<le> ereal (Py y)" |         | 
|    651       using Px Py by (auto simp: distributed_def) |    664       using Px Py by (auto simp: distributed_def) | 
|    652     show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. |    665     show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. | 
|    653   qed (fact | simp)+ |    666     show "density (S \<Otimes>\<^sub>M T) (\<lambda>(x, y). ennreal (Px x) * ennreal (Py y)) = | 
|    654    |    667       density (S \<Otimes>\<^sub>M T) (\<lambda>x. ennreal (Px (fst x) * Py (snd x)))" | 
|    655   have M: "?M = KL_divergence b (density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))) (density ?P (\<lambda>x. ereal (Pxy x)))" |    668       using Px_nn Py_nn by (auto intro!: density_cong simp: distributed_def ennreal_mult space_pair_measure) | 
|         |    669   qed fact | 
|         |    670  | 
|         |    671   have M: "?M = KL_divergence b (density ?P (\<lambda>x. ennreal (Px (fst x) * Py (snd x)))) (density ?P (\<lambda>x. ennreal (Pxy x)))" | 
|    656     unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] .. |    672     unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] .. | 
|    657  |    673  | 
|    658   from Px Py have f: "(\<lambda>x. Px (fst x) * Py (snd x)) \<in> borel_measurable ?P" |    674   from Px Py have f: "(\<lambda>x. Px (fst x) * Py (snd x)) \<in> borel_measurable ?P" | 
|    659     by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') |    675     by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') | 
|    660   have PxPy_nonneg: "AE x in ?P. 0 \<le> Px (fst x) * Py (snd x)" |    676   have PxPy_nonneg: "AE x in ?P. 0 \<le> Px (fst x) * Py (snd x)" | 
|    661   proof (rule ST.AE_pair_measure) |    677     using Px_nn Py_nn by (auto simp: space_pair_measure) | 
|    662     show "{x \<in> space ?P. 0 \<le> Px (fst x) * Py (snd x)} \<in> sets ?P" |         | 
|    663       using f by auto |         | 
|    664     show "AE x in S. AE y in T. 0 \<le> Px (fst (x, y)) * Py (snd (x, y))" |         | 
|    665       using Px Py by (auto simp: zero_le_mult_iff dest!: distributed_real_AE) |         | 
|    666   qed |         | 
|    667  |    678  | 
|    668   have "(AE x in ?P. Px (fst x) = 0 \<longrightarrow> Pxy x = 0)" |    679   have "(AE x in ?P. Px (fst x) = 0 \<longrightarrow> Pxy x = 0)" | 
|    669     by (rule subdensity_real[OF measurable_fst Pxy Px]) auto |    680     by (rule subdensity_real[OF measurable_fst Pxy Px]) (insert Px_nn Pxy_nn, auto simp: space_pair_measure) | 
|    670   moreover |    681   moreover | 
|    671   have "(AE x in ?P. Py (snd x) = 0 \<longrightarrow> Pxy x = 0)" |    682   have "(AE x in ?P. Py (snd x) = 0 \<longrightarrow> Pxy x = 0)" | 
|    672     by (rule subdensity_real[OF measurable_snd Pxy Py]) auto |    683     by (rule subdensity_real[OF measurable_snd Pxy Py]) (insert Py_nn Pxy_nn, auto simp: space_pair_measure) | 
|    673   ultimately have ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 \<longrightarrow> Pxy x = 0" |    684   ultimately have ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 \<longrightarrow> Pxy x = 0" | 
|    674     by eventually_elim auto |    685     by eventually_elim auto | 
|    675  |    686  | 
|    676   show "?M = ?R" |    687   show "?M = ?R" | 
|    677     unfolding M f_def |    688     unfolding M f_def | 
|    678     using b_gt_1 f PxPy_nonneg Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] ac |    689     using b_gt_1 f PxPy_nonneg ac Pxy_nn | 
|    679     by (rule ST.KL_density_density) |    690     by (intro ST.KL_density_density) (auto simp: space_pair_measure) | 
|    680  |    691  | 
|    681   assume int: "integrable (S \<Otimes>\<^sub>M T) f" |    692   assume int: "integrable (S \<Otimes>\<^sub>M T) f" | 
|    682   show "0 \<le> ?M" unfolding M |    693   show "0 \<le> ?M" unfolding M | 
|    683   proof (rule ST.KL_density_density_nonneg |    694   proof (intro ST.KL_density_density_nonneg) | 
|    684     [OF b_gt_1 f PxPy_nonneg _ Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] _ ac int[unfolded f_def]]) |    695     show "prob_space (density (S \<Otimes>\<^sub>M T) (\<lambda>x. ennreal (Pxy x))) " | 
|    685     show "prob_space (density (S \<Otimes>\<^sub>M T) (\<lambda>x. ereal (Pxy x))) " |         | 
|    686       unfolding distributed_distr_eq_density[OF Pxy, symmetric] |    696       unfolding distributed_distr_eq_density[OF Pxy, symmetric] | 
|    687       using distributed_measurable[OF Pxy] by (rule prob_space_distr) |    697       using distributed_measurable[OF Pxy] by (rule prob_space_distr) | 
|    688     show "prob_space (density (S \<Otimes>\<^sub>M T) (\<lambda>x. ereal (Px (fst x) * Py (snd x))))" |    698     show "prob_space (density (S \<Otimes>\<^sub>M T) (\<lambda>x. ennreal (Px (fst x) * Py (snd x))))" | 
|    689       unfolding distr_eq[symmetric] by unfold_locales |    699       unfolding distr_eq[symmetric] by unfold_locales | 
|    690   qed |    700     show "integrable (S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))))" | 
|         |    701       using int unfolding f_def . | 
|         |    702   qed (insert ac, auto simp: b_gt_1 Px_nn Py_nn Pxy_nn space_pair_measure) | 
|    691 qed |    703 qed | 
|    692  |    704  | 
|    693 lemma (in information_space) |    705 lemma (in information_space) | 
|    694   fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" |    706   fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" | 
|    695   assumes "sigma_finite_measure S" "sigma_finite_measure T" |    707   assumes "sigma_finite_measure S" "sigma_finite_measure T" | 
|    696   assumes Px[measurable]: "distributed M S X Px" and Py[measurable]: "distributed M T Y Py" |    708   assumes Px[measurable]: "distributed M S X Px" and Px_nn: "\<And>x. x \<in> space S \<Longrightarrow> 0 \<le> Px x" | 
|    697   assumes Pxy[measurable]: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy" |    709     and Py[measurable]: "distributed M T Y Py" and Py_nn:  "\<And>x. x \<in> space T \<Longrightarrow> 0 \<le> Py x" | 
|         |    710     and Pxy[measurable]: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy" | 
|         |    711     and Pxy_nn: "\<And>x. x \<in> space (S \<Otimes>\<^sub>M T) \<Longrightarrow> 0 \<le> Pxy x" | 
|    698   assumes ae: "AE x in S. AE y in T. Pxy (x, y) = Px x * Py y" |    712   assumes ae: "AE x in S. AE y in T. Pxy (x, y) = Px x * Py y" | 
|    699   shows mutual_information_eq_0: "mutual_information b S T X Y = 0" |    713   shows mutual_information_eq_0: "mutual_information b S T X Y = 0" | 
|    700 proof - |    714 proof - | 
|    701   interpret S: sigma_finite_measure S by fact |    715   interpret S: sigma_finite_measure S by fact | 
|    702   interpret T: sigma_finite_measure T by fact |    716   interpret T: sigma_finite_measure T by fact | 
|    703   interpret ST: pair_sigma_finite S T .. |    717   interpret ST: pair_sigma_finite S T .. | 
|         |    718   note | 
|         |    719     distributed_real_measurable[OF Px_nn Px, measurable] | 
|         |    720     distributed_real_measurable[OF Py_nn Py, measurable] | 
|         |    721     distributed_real_measurable[OF Pxy_nn Pxy, measurable] | 
|    704  |    722  | 
|    705   have "AE x in S \<Otimes>\<^sub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0" |    723   have "AE x in S \<Otimes>\<^sub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0" | 
|    706     by (rule subdensity_real[OF measurable_fst Pxy Px]) auto |    724     by (rule subdensity_real[OF measurable_fst Pxy Px]) (auto simp: Px_nn Pxy_nn space_pair_measure) | 
|    707   moreover |    725   moreover | 
|    708   have "AE x in S \<Otimes>\<^sub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" |    726   have "AE x in S \<Otimes>\<^sub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" | 
|    709     by (rule subdensity_real[OF measurable_snd Pxy Py]) auto |    727     by (rule subdensity_real[OF measurable_snd Pxy Py]) (auto simp: Py_nn Pxy_nn space_pair_measure) | 
|    710   moreover  |    728   moreover | 
|    711   have "AE x in S \<Otimes>\<^sub>M T. Pxy x = Px (fst x) * Py (snd x)" |    729   have "AE x in S \<Otimes>\<^sub>M T. Pxy x = Px (fst x) * Py (snd x)" | 
|    712     using distributed_real_measurable[OF Px] distributed_real_measurable[OF Py] distributed_real_measurable[OF Pxy] |         | 
|    713     by (intro ST.AE_pair_measure) (auto simp: ae intro!: measurable_snd'' measurable_fst'') |    730     by (intro ST.AE_pair_measure) (auto simp: ae intro!: measurable_snd'' measurable_fst'') | 
|    714   ultimately have "AE x in S \<Otimes>\<^sub>M T. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) = 0" |    731   ultimately have "AE x in S \<Otimes>\<^sub>M T. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) = 0" | 
|    715     by eventually_elim simp |    732     by eventually_elim simp | 
|    716   then have "(\<integral>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) \<partial>(S \<Otimes>\<^sub>M T)) = (\<integral>x. 0 \<partial>(S \<Otimes>\<^sub>M T))" |    733   then have "(\<integral>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) \<partial>(S \<Otimes>\<^sub>M T)) = (\<integral>x. 0 \<partial>(S \<Otimes>\<^sub>M T))" | 
|    717     by (intro integral_cong_AE) auto |    734     by (intro integral_cong_AE) auto | 
|    718   then show ?thesis |    735   then show ?thesis | 
|    719     by (subst mutual_information_distr[OF assms(1-5)]) simp |    736     by (subst mutual_information_distr[OF assms(1-8)]) (auto simp add: space_pair_measure) | 
|    720 qed |    737 qed | 
|    721  |    738  | 
|    722 lemma (in information_space) mutual_information_simple_distributed: |    739 lemma (in information_space) mutual_information_simple_distributed: | 
|    723   assumes X: "simple_distributed M X Px" and Y: "simple_distributed M Y Py" |    740   assumes X: "simple_distributed M X Px" and Y: "simple_distributed M Y Py" | 
|    724   assumes XY: "simple_distributed M (\<lambda>x. (X x, Y x)) Pxy" |    741   assumes XY: "simple_distributed M (\<lambda>x. (X x, Y x)) Pxy" | 
|    725   shows "\<I>(X ; Y) = (\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x))`space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" |    742   shows "\<I>(X ; Y) = (\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x))`space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" | 
|    726 proof (subst mutual_information_distr[OF _ _ simple_distributed[OF X] simple_distributed[OF Y] simple_distributed_joint[OF XY]]) |    743 proof (subst mutual_information_distr[OF _ _ simple_distributed[OF X] _ simple_distributed[OF Y] _ simple_distributed_joint[OF XY]]) | 
|    727   note fin = simple_distributed_joint_finite[OF XY, simp] |    744   note fin = simple_distributed_joint_finite[OF XY, simp] | 
|    728   show "sigma_finite_measure (count_space (X ` space M))" |    745   show "sigma_finite_measure (count_space (X ` space M))" | 
|    729     by (simp add: sigma_finite_measure_count_space_finite) |    746     by (simp add: sigma_finite_measure_count_space_finite) | 
|    730   show "sigma_finite_measure (count_space (Y ` space M))" |    747   show "sigma_finite_measure (count_space (Y ` space M))" | 
|    731     by (simp add: sigma_finite_measure_count_space_finite) |    748     by (simp add: sigma_finite_measure_count_space_finite) | 
|    797     done |    815     done | 
|    798  |    816  | 
|    799   have int_eq: "(\<integral> x. f x * log b (f x) \<partial>MX) = (\<integral> x. log b (f x) \<partial>distr M MX X)" |    817   have int_eq: "(\<integral> x. f x * log b (f x) \<partial>MX) = (\<integral> x. log b (f x) \<partial>distr M MX X)" | 
|    800     unfolding distributed_distr_eq_density[OF X] |    818     unfolding distributed_distr_eq_density[OF X] | 
|    801     using D |    819     using D | 
|    802     by (subst integral_density) |    820     by (subst integral_density) (auto simp: nn) | 
|    803        (auto simp: borel_measurable_ereal_iff) |         | 
|    804  |    821  | 
|    805   show ?eq |    822   show ?eq | 
|    806     unfolding entropy_def KL_divergence_def entropy_density_def comp_def int_eq neg_equal_iff_equal |    823     unfolding entropy_def KL_divergence_def entropy_density_def comp_def int_eq neg_equal_iff_equal | 
|    807     using ae_eq by (intro integral_cong_AE) auto |    824     using ae_eq by (intro integral_cong_AE) (auto simp: nn) | 
|    808 qed |    825 qed | 
|    809  |    826  | 
|    810 lemma (in prob_space) distributed_imp_emeasure_nonzero: |    827 lemma (in information_space) entropy_le: | 
|    811   assumes X: "distributed M MX X Px" |    828   fixes Px :: "'b \<Rightarrow> real" and MX :: "'b measure" | 
|    812   shows "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> 0" |    829   assumes X[measurable]: "distributed M MX X Px" and Px_nn[simp]: "\<And>x. x \<in> space MX \<Longrightarrow> 0 \<le> Px x" | 
|    813 proof |    830   and fin: "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> top" | 
|    814   note Px = distributed_borel_measurable[OF X] distributed_AE[OF X] |    831   and int: "integrable MX (\<lambda>x. - Px x * log b (Px x))" | 
|         |    832   shows "entropy b MX X \<le> log b (measure MX {x \<in> space MX. Px x \<noteq> 0})" | 
|         |    833 proof - | 
|         |    834   note Px = distributed_borel_measurable[OF X] | 
|    815   interpret X: prob_space "distr M MX X" |    835   interpret X: prob_space "distr M MX X" | 
|    816     using distributed_measurable[OF X] by (rule prob_space_distr) |    836     using distributed_measurable[OF X] by (rule prob_space_distr) | 
|    817  |    837  | 
|    818   assume "emeasure MX {x \<in> space MX. Px x \<noteq> 0} = 0" |    838   have " - log b (measure MX {x \<in> space MX. Px x \<noteq> 0}) = | 
|    819   with Px have "AE x in MX. Px x = 0" |         | 
|    820     by (intro AE_I[OF subset_refl]) (auto simp: borel_measurable_ereal_iff) |         | 
|    821   moreover |         | 
|    822   from X.emeasure_space_1 have "(\<integral>\<^sup>+x. Px x \<partial>MX) = 1" |         | 
|    823     unfolding distributed_distr_eq_density[OF X] using Px |         | 
|    824     by (subst (asm) emeasure_density) |         | 
|    825        (auto simp: borel_measurable_ereal_iff intro!: integral_cong cong: nn_integral_cong) |         | 
|    826   ultimately show False |         | 
|    827     by (simp add: nn_integral_cong_AE) |         | 
|    828 qed |         | 
|    829  |         | 
|    830 lemma (in information_space) entropy_le: |         | 
|    831   fixes Px :: "'b \<Rightarrow> real" and MX :: "'b measure" |         | 
|    832   assumes X[measurable]: "distributed M MX X Px" |         | 
|    833   and fin: "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> \<infinity>" |         | 
|    834   and int: "integrable MX (\<lambda>x. - Px x * log b (Px x))" |         | 
|    835   shows "entropy b MX X \<le> log b (measure MX {x \<in> space MX. Px x \<noteq> 0})" |         | 
|    836 proof - |         | 
|    837   note Px = distributed_borel_measurable[OF X] distributed_AE[OF X] |         | 
|    838   interpret X: prob_space "distr M MX X" |         | 
|    839     using distributed_measurable[OF X] by (rule prob_space_distr) |         | 
|    840  |         | 
|    841   have " - log b (measure MX {x \<in> space MX. Px x \<noteq> 0}) =  |         | 
|    842     - log b (\<integral> x. indicator {x \<in> space MX. Px x \<noteq> 0} x \<partial>MX)" |    839     - log b (\<integral> x. indicator {x \<in> space MX. Px x \<noteq> 0} x \<partial>MX)" | 
|    843     using Px fin |    840     using Px Px_nn fin by (auto simp: measure_def) | 
|    844     by (auto simp: measure_def borel_measurable_ereal_iff) |         | 
|    845   also have "- log b (\<integral> x. indicator {x \<in> space MX. Px x \<noteq> 0} x \<partial>MX) = - log b (\<integral> x. 1 / Px x \<partial>distr M MX X)" |    841   also have "- log b (\<integral> x. indicator {x \<in> space MX. Px x \<noteq> 0} x \<partial>MX) = - log b (\<integral> x. 1 / Px x \<partial>distr M MX X)" | 
|    846     unfolding distributed_distr_eq_density[OF X] using Px |    842     unfolding distributed_distr_eq_density[OF X] using Px Px_nn | 
|    847     apply (intro arg_cong[where f="log b"] arg_cong[where f=uminus]) |    843     apply (intro arg_cong[where f="log b"] arg_cong[where f=uminus]) | 
|    848     by (subst integral_density) (auto simp: borel_measurable_ereal_iff simp del: integral_indicator intro!: integral_cong) |    844     by (subst integral_density) (auto simp del: integral_indicator intro!: integral_cong) | 
|    849   also have "\<dots> \<le> (\<integral> x. - log b (1 / Px x) \<partial>distr M MX X)" |    845   also have "\<dots> \<le> (\<integral> x. - log b (1 / Px x) \<partial>distr M MX X)" | 
|    850   proof (rule X.jensens_inequality[of "\<lambda>x. 1 / Px x" "{0<..}" 0 1 "\<lambda>x. - log b x"]) |    846   proof (rule X.jensens_inequality[of "\<lambda>x. 1 / Px x" "{0<..}" 0 1 "\<lambda>x. - log b x"]) | 
|    851     show "AE x in distr M MX X. 1 / Px x \<in> {0<..}" |    847     show "AE x in distr M MX X. 1 / Px x \<in> {0<..}" | 
|    852       unfolding distributed_distr_eq_density[OF X] |    848       unfolding distributed_distr_eq_density[OF X] | 
|    853       using Px by (auto simp: AE_density) |    849       using Px by (auto simp: AE_density) | 
|    854     have [simp]: "\<And>x. x \<in> space MX \<Longrightarrow> ereal (if Px x = 0 then 0 else 1) = indicator {x \<in> space MX. Px x \<noteq> 0} x" |    850     have [simp]: "\<And>x. x \<in> space MX \<Longrightarrow> ennreal (if Px x = 0 then 0 else 1) = indicator {x \<in> space MX. Px x \<noteq> 0} x" | 
|    855       by (auto simp: one_ereal_def) |    851       by (auto simp: one_ennreal_def) | 
|    856     have "(\<integral>\<^sup>+ x. max 0 (ereal (- (if Px x = 0 then 0 else 1))) \<partial>MX) = (\<integral>\<^sup>+ x. 0 \<partial>MX)" |    852     have "(\<integral>\<^sup>+ x. ennreal (- (if Px x = 0 then 0 else 1)) \<partial>MX) = (\<integral>\<^sup>+ x. 0 \<partial>MX)" | 
|    857       by (intro nn_integral_cong) (auto split: split_max) |    853       by (intro nn_integral_cong) (auto simp: ennreal_neg) | 
|    858     then show "integrable (distr M MX X) (\<lambda>x. 1 / Px x)" |    854     then show "integrable (distr M MX X) (\<lambda>x. 1 / Px x)" | 
|    859       unfolding distributed_distr_eq_density[OF X] using Px |    855       unfolding distributed_distr_eq_density[OF X] using Px | 
|    860       by (auto simp: nn_integral_density real_integrable_def borel_measurable_ereal_iff fin nn_integral_max_0 |    856       by (auto simp: nn_integral_density real_integrable_def fin ennreal_neg ennreal_mult[symmetric] | 
|    861               cong: nn_integral_cong) |    857               cong: nn_integral_cong) | 
|    862     have "integrable MX (\<lambda>x. Px x * log b (1 / Px x)) = |    858     have "integrable MX (\<lambda>x. Px x * log b (1 / Px x)) = | 
|    863       integrable MX (\<lambda>x. - Px x * log b (Px x))" |    859       integrable MX (\<lambda>x. - Px x * log b (Px x))" | 
|    864       using Px |    860       using Px | 
|    865       by (intro integrable_cong_AE) |    861       by (intro integrable_cong_AE) (auto simp: log_divide_eq less_le) | 
|    866          (auto simp: borel_measurable_ereal_iff log_divide_eq |         | 
|    867                   intro!: measurable_If) |         | 
|    868     then show "integrable (distr M MX X) (\<lambda>x. - log b (1 / Px x))" |    862     then show "integrable (distr M MX X) (\<lambda>x. - log b (1 / Px x))" | 
|    869       unfolding distributed_distr_eq_density[OF X] |    863       unfolding distributed_distr_eq_density[OF X] | 
|    870       using Px int |    864       using Px int | 
|    871       by (subst integrable_real_density) (auto simp: borel_measurable_ereal_iff) |    865       by (subst integrable_real_density) auto | 
|    872   qed (auto simp: minus_log_convex[OF b_gt_1]) |    866   qed (auto simp: minus_log_convex[OF b_gt_1]) | 
|    873   also have "\<dots> = (\<integral> x. log b (Px x) \<partial>distr M MX X)" |    867   also have "\<dots> = (\<integral> x. log b (Px x) \<partial>distr M MX X)" | 
|    874     unfolding distributed_distr_eq_density[OF X] using Px |    868     unfolding distributed_distr_eq_density[OF X] using Px | 
|    875     by (intro integral_cong_AE) (auto simp: AE_density log_divide_eq) |    869     by (intro integral_cong_AE) (auto simp: AE_density log_divide_eq) | 
|    876   also have "\<dots> = - entropy b MX X" |    870   also have "\<dots> = - entropy b MX X" | 
|    877     unfolding distributed_distr_eq_density[OF X] using Px |    871     unfolding distributed_distr_eq_density[OF X] using Px | 
|    878     by (subst entropy_distr[OF X]) (auto simp: borel_measurable_ereal_iff integral_density) |    872     by (subst entropy_distr[OF X]) (auto simp: integral_density) | 
|    879   finally show ?thesis |    873   finally show ?thesis | 
|    880     by simp |    874     by simp | 
|    881 qed |    875 qed | 
|    882  |    876  | 
|    883 lemma (in information_space) entropy_le_space: |    877 lemma (in information_space) entropy_le_space: | 
|    884   fixes Px :: "'b \<Rightarrow> real" and MX :: "'b measure" |    878   fixes Px :: "'b \<Rightarrow> real" and MX :: "'b measure" | 
|    885   assumes X: "distributed M MX X Px" |    879   assumes X: "distributed M MX X Px" and Px_nn[simp]: "\<And>x. x \<in> space MX \<Longrightarrow> 0 \<le> Px x" | 
|    886   and fin: "finite_measure MX" |    880   and fin: "finite_measure MX" | 
|    887   and int: "integrable MX (\<lambda>x. - Px x * log b (Px x))" |    881   and int: "integrable MX (\<lambda>x. - Px x * log b (Px x))" | 
|    888   shows "entropy b MX X \<le> log b (measure MX (space MX))" |    882   shows "entropy b MX X \<le> log b (measure MX (space MX))" | 
|    889 proof - |    883 proof - | 
|    890   note Px = distributed_borel_measurable[OF X] distributed_AE[OF X] |    884   note Px = distributed_borel_measurable[OF X] | 
|    891   interpret finite_measure MX by fact |    885   interpret finite_measure MX by fact | 
|    892   have "entropy b MX X \<le> log b (measure MX {x \<in> space MX. Px x \<noteq> 0})" |    886   have "entropy b MX X \<le> log b (measure MX {x \<in> space MX. Px x \<noteq> 0})" | 
|    893     using int X by (intro entropy_le) auto |    887     using int X by (intro entropy_le) auto | 
|    894   also have "\<dots> \<le> log b (measure MX (space MX))" |    888   also have "\<dots> \<le> log b (measure MX (space MX))" | 
|    895     using Px distributed_imp_emeasure_nonzero[OF X] |    889     using Px distributed_imp_emeasure_nonzero[OF X] | 
|    896     by (intro log_le) |    890     by (intro log_le) | 
|    897        (auto intro!: borel_measurable_ereal_iff finite_measure_mono b_gt_1 |    891        (auto intro!: finite_measure_mono b_gt_1 less_le[THEN iffD2] | 
|    898                      less_le[THEN iffD2] measure_nonneg simp: emeasure_eq_measure) |    892              simp: emeasure_eq_measure cong: conj_cong) | 
|    899   finally show ?thesis . |    893   finally show ?thesis . | 
|    900 qed |    894 qed | 
|    901  |    895  | 
|    902 lemma (in information_space) entropy_uniform: |    896 lemma (in information_space) entropy_uniform: | 
|    903   assumes X: "distributed M MX X (\<lambda>x. indicator A x / measure MX A)" (is "distributed _ _ _ ?f") |    897   assumes X: "distributed M MX X (\<lambda>x. indicator A x / measure MX A)" (is "distributed _ _ _ ?f") | 
|    959     (count_space (X ` space M)) (count_space (Y ` space M)) (count_space (Z ` space M)) X Y Z" |    953     (count_space (X ` space M)) (count_space (Y ` space M)) (count_space (Z ` space M)) X Y Z" | 
|    960  |    954  | 
|    961 lemma (in information_space) |    955 lemma (in information_space) | 
|    962   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" and P: "sigma_finite_measure P" |    956   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" and P: "sigma_finite_measure P" | 
|    963   assumes Px[measurable]: "distributed M S X Px" |    957   assumes Px[measurable]: "distributed M S X Px" | 
|         |    958     and Px_nn[simp]: "\<And>x. x \<in> space S \<Longrightarrow> 0 \<le> Px x" | 
|    964   assumes Pz[measurable]: "distributed M P Z Pz" |    959   assumes Pz[measurable]: "distributed M P Z Pz" | 
|         |    960     and Pz_nn[simp]: "\<And>z. z \<in> space P \<Longrightarrow> 0 \<le> Pz z" | 
|    965   assumes Pyz[measurable]: "distributed M (T \<Otimes>\<^sub>M P) (\<lambda>x. (Y x, Z x)) Pyz" |    961   assumes Pyz[measurable]: "distributed M (T \<Otimes>\<^sub>M P) (\<lambda>x. (Y x, Z x)) Pyz" | 
|         |    962     and Pyz_nn[simp]: "\<And>y z. y \<in> space T \<Longrightarrow> z \<in> space P \<Longrightarrow> 0 \<le> Pyz (y, z)" | 
|    966   assumes Pxz[measurable]: "distributed M (S \<Otimes>\<^sub>M P) (\<lambda>x. (X x, Z x)) Pxz" |    963   assumes Pxz[measurable]: "distributed M (S \<Otimes>\<^sub>M P) (\<lambda>x. (X x, Z x)) Pxz" | 
|         |    964     and Pxz_nn[simp]: "\<And>x z. x \<in> space S \<Longrightarrow> z \<in> space P \<Longrightarrow> 0 \<le> Pxz (x, z)" | 
|    967   assumes Pxyz[measurable]: "distributed M (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. (X x, Y x, Z x)) Pxyz" |    965   assumes Pxyz[measurable]: "distributed M (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. (X x, Y x, Z x)) Pxyz" | 
|         |    966     and Pxyz_nn[simp]: "\<And>x y z. x \<in> space S \<Longrightarrow> y \<in> space T \<Longrightarrow> z \<in> space P \<Longrightarrow> 0 \<le> Pxyz (x, y, z)" | 
|    968   assumes I1: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" |    967   assumes I1: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" | 
|    969   assumes I2: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" |    968   assumes I2: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" | 
|    970   shows conditional_mutual_information_generic_eq: "conditional_mutual_information b S T P X Y Z |    969   shows conditional_mutual_information_generic_eq: "conditional_mutual_information b S T P X Y Z | 
|    971     = (\<integral>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" (is "?eq") |    970     = (\<integral>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" (is "?eq") | 
|    972     and conditional_mutual_information_generic_nonneg: "0 \<le> conditional_mutual_information b S T P X Y Z" (is "?nonneg") |    971     and conditional_mutual_information_generic_nonneg: "0 \<le> conditional_mutual_information b S T P X Y Z" (is "?nonneg") | 
|    973 proof - |    972 proof - | 
|         |    973   have [measurable]: "Px \<in> S \<rightarrow>\<^sub>M borel" | 
|         |    974     using Px Px_nn by (intro distributed_real_measurable) | 
|         |    975   have [measurable]: "Pz \<in> P \<rightarrow>\<^sub>M borel" | 
|         |    976     using Pz Pz_nn by (intro distributed_real_measurable) | 
|         |    977   have measurable_Pyz[measurable]: "Pyz \<in> (T \<Otimes>\<^sub>M P) \<rightarrow>\<^sub>M borel" | 
|         |    978     using Pyz Pyz_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure) | 
|         |    979   have measurable_Pxz[measurable]: "Pxz \<in> (S \<Otimes>\<^sub>M P) \<rightarrow>\<^sub>M borel" | 
|         |    980     using Pxz Pxz_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure) | 
|         |    981   have measurable_Pxyz[measurable]: "Pxyz \<in> (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) \<rightarrow>\<^sub>M borel" | 
|         |    982     using Pxyz Pxyz_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure) | 
|         |    983  | 
|    974   interpret S: sigma_finite_measure S by fact |    984   interpret S: sigma_finite_measure S by fact | 
|    975   interpret T: sigma_finite_measure T by fact |    985   interpret T: sigma_finite_measure T by fact | 
|    976   interpret P: sigma_finite_measure P by fact |    986   interpret P: sigma_finite_measure P by fact | 
|    977   interpret TP: pair_sigma_finite T P .. |    987   interpret TP: pair_sigma_finite T P .. | 
|    978   interpret SP: pair_sigma_finite S P .. |    988   interpret SP: pair_sigma_finite S P .. | 
|    982   interpret TPS: pair_sigma_finite "T \<Otimes>\<^sub>M P" S .. |    992   interpret TPS: pair_sigma_finite "T \<Otimes>\<^sub>M P" S .. | 
|    983   have TP: "sigma_finite_measure (T \<Otimes>\<^sub>M P)" .. |    993   have TP: "sigma_finite_measure (T \<Otimes>\<^sub>M P)" .. | 
|    984   have SP: "sigma_finite_measure (S \<Otimes>\<^sub>M P)" .. |    994   have SP: "sigma_finite_measure (S \<Otimes>\<^sub>M P)" .. | 
|    985   have YZ: "random_variable (T \<Otimes>\<^sub>M P) (\<lambda>x. (Y x, Z x))" |    995   have YZ: "random_variable (T \<Otimes>\<^sub>M P) (\<lambda>x. (Y x, Z x))" | 
|    986     using Pyz by (simp add: distributed_measurable) |    996     using Pyz by (simp add: distributed_measurable) | 
|    987    |    997  | 
|    988   from Pxz Pxyz have distr_eq: "distr M (S \<Otimes>\<^sub>M P) (\<lambda>x. (X x, Z x)) = |    998   from Pxz Pxyz have distr_eq: "distr M (S \<Otimes>\<^sub>M P) (\<lambda>x. (X x, Z x)) = | 
|    989     distr (distr M (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. (X x, Y x, Z x))) (S \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). (x, z))" |    999     distr (distr M (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. (X x, Y x, Z x))) (S \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). (x, z))" | 
|    990     by (simp add: comp_def distr_distr) |   1000     by (simp add: comp_def distr_distr) | 
|    991  |   1001  | 
|    992   have "mutual_information b S P X Z = |   1002   have "mutual_information b S P X Z = | 
|    993     (\<integral>x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \<partial>(S \<Otimes>\<^sub>M P))" |   1003     (\<integral>x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \<partial>(S \<Otimes>\<^sub>M P))" | 
|    994     by (rule mutual_information_distr[OF S P Px Pz Pxz]) |   1004     by (rule mutual_information_distr[OF S P Px Px_nn Pz Pz_nn Pxz Pxz_nn]) | 
|    995   also have "\<dots> = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" |   1005   also have "\<dots> = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" | 
|    996     using b_gt_1 Pxz Px Pz |   1006     using b_gt_1 Pxz Px Pz | 
|    997     by (subst distributed_transform_integral[OF Pxyz Pxz, where T="\<lambda>(x, y, z). (x, z)"]) (auto simp: split_beta') |   1007     by (subst distributed_transform_integral[OF Pxyz _ Pxz _, where T="\<lambda>(x, y, z). (x, z)"]) | 
|         |   1008        (auto simp: split_beta' space_pair_measure) | 
|    998   finally have mi_eq: |   1009   finally have mi_eq: | 
|    999     "mutual_information b S P X Z = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" . |   1010     "mutual_information b S P X Z = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" . | 
|   1000    |   1011  | 
|   1001   have ae1: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Px (fst x) = 0 \<longrightarrow> Pxyz x = 0" |   1012   have ae1: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Px (fst x) = 0 \<longrightarrow> Pxyz x = 0" | 
|   1002     by (intro subdensity_real[of fst, OF _ Pxyz Px]) auto |   1013     by (intro subdensity_real[of fst, OF _ Pxyz Px]) (auto simp: space_pair_measure) | 
|   1003   moreover have ae2: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pz (snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" |   1014   moreover have ae2: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pz (snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" | 
|   1004     by (intro subdensity_real[of "\<lambda>x. snd (snd x)", OF _ Pxyz Pz]) auto |   1015     by (intro subdensity_real[of "\<lambda>x. snd (snd x)", OF _ Pxyz Pz]) (auto simp: space_pair_measure) | 
|   1005   moreover have ae3: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pxz (fst x, snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" |   1016   moreover have ae3: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pxz (fst x, snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" | 
|   1006     by (intro subdensity_real[of "\<lambda>x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) auto |   1017     by (intro subdensity_real[of "\<lambda>x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) (auto simp: space_pair_measure) | 
|   1007   moreover have ae4: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pyz (snd x) = 0 \<longrightarrow> Pxyz x = 0" |   1018   moreover have ae4: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pyz (snd x) = 0 \<longrightarrow> Pxyz x = 0" | 
|   1008     by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) auto |   1019     by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) (auto simp: space_pair_measure) | 
|   1009   moreover have ae5: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. 0 \<le> Px (fst x)" |         | 
|   1010     using Px by (intro STP.AE_pair_measure) (auto simp: comp_def dest: distributed_real_AE) |         | 
|   1011   moreover have ae6: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. 0 \<le> Pyz (snd x)" |         | 
|   1012     using Pyz by (intro STP.AE_pair_measure) (auto simp: comp_def dest: distributed_real_AE) |         | 
|   1013   moreover have ae7: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. 0 \<le> Pz (snd (snd x))" |         | 
|   1014     using Pz Pz[THEN distributed_real_measurable] |         | 
|   1015     by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure AE_I2[of S] dest: distributed_real_AE) |         | 
|   1016   moreover have ae8: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. 0 \<le> Pxz (fst x, snd (snd x))" |         | 
|   1017     using Pxz[THEN distributed_real_AE, THEN SP.AE_pair] |         | 
|   1018     by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure) |         | 
|   1019   moreover note Pxyz[THEN distributed_real_AE] |         | 
|   1020   ultimately have ae: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. |   1020   ultimately have ae: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. | 
|   1021     Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) - |   1021     Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) - | 
|   1022     Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) = |   1022     Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) = | 
|   1023     Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " |   1023     Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " | 
|         |   1024     using AE_space | 
|   1024   proof eventually_elim |   1025   proof eventually_elim | 
|   1025     case (elim x) |   1026     case (elim x) | 
|   1026     show ?case |   1027     show ?case | 
|   1027     proof cases |   1028     proof cases | 
|   1028       assume "Pxyz x \<noteq> 0" |   1029       assume "Pxyz x \<noteq> 0" | 
|   1029       with elim have "0 < Px (fst x)" "0 < Pz (snd (snd x))" "0 < Pxz (fst x, snd (snd x))" |   1030       with elim have "0 < Px (fst x)" "0 < Pz (snd (snd x))" "0 < Pxz (fst x, snd (snd x))" | 
|   1030         "0 < Pyz (snd x)" "0 < Pxyz x" |   1031         "0 < Pyz (snd x)" "0 < Pxyz x" | 
|   1031         by auto |   1032         by (auto simp: space_pair_measure less_le) | 
|   1032       then show ?thesis |   1033       then show ?thesis | 
|   1033         using b_gt_1 by (simp add: log_simps less_imp_le field_simps) |   1034         using b_gt_1 by (simp add: log_simps less_imp_le field_simps) | 
|   1034     qed simp |   1035     qed simp | 
|   1035   qed |   1036   qed | 
|   1036   with I1 I2 show ?eq |   1037   with I1 I2 show ?eq | 
|   1037     unfolding conditional_mutual_information_def |   1038     unfolding conditional_mutual_information_def | 
|   1038     apply (subst mi_eq) |   1039     apply (subst mi_eq) | 
|   1039     apply (subst mutual_information_distr[OF S TP Px Pyz Pxyz]) |   1040     apply (subst mutual_information_distr[OF S TP Px Px_nn Pyz _ Pxyz]) | 
|         |   1041     apply (auto simp: space_pair_measure) | 
|   1040     apply (subst integral_diff[symmetric]) |   1042     apply (subst integral_diff[symmetric]) | 
|   1041     apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff) |   1043     apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff) | 
|   1042     done |   1044     done | 
|   1043  |   1045  | 
|   1044   let ?P = "density (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) Pxyz" |   1046   let ?P = "density (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) Pxyz" | 
|   1051     unfolding distributed_distr_eq_density[OF Pyz, symmetric] |   1053     unfolding distributed_distr_eq_density[OF Pyz, symmetric] | 
|   1052     by (rule prob_space_distr) simp |   1054     by (rule prob_space_distr) simp | 
|   1053  |   1055  | 
|   1054   let ?f = "\<lambda>(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)" |   1056   let ?f = "\<lambda>(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)" | 
|   1055  |   1057  | 
|   1056   from subdensity_real[of snd, OF _ Pyz Pz] |   1058   from subdensity_real[of snd, OF _ Pyz Pz _ AE_I2 AE_I2] | 
|   1057   have aeX1: "AE x in T \<Otimes>\<^sub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" by (auto simp: comp_def) |   1059   have aeX1: "AE x in T \<Otimes>\<^sub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" | 
|         |   1060     by (auto simp: comp_def space_pair_measure) | 
|   1058   have aeX2: "AE x in T \<Otimes>\<^sub>M P. 0 \<le> Pz (snd x)" |   1061   have aeX2: "AE x in T \<Otimes>\<^sub>M P. 0 \<le> Pz (snd x)" | 
|   1059     using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def dest: distributed_real_AE) |   1062     using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def) | 
|   1060  |   1063  | 
|   1061   have aeX3: "AE y in T \<Otimes>\<^sub>M P. (\<integral>\<^sup>+ x. ereal (Pxz (x, snd y)) \<partial>S) = ereal (Pz (snd y))" |   1064   have aeX3: "AE y in T \<Otimes>\<^sub>M P. (\<integral>\<^sup>+ x. ennreal (Pxz (x, snd y)) \<partial>S) = ennreal (Pz (snd y))" | 
|   1062     using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz] |   1065     using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz] | 
|   1063     by (intro TP.AE_pair_measure) (auto dest: distributed_real_AE) |   1066     by (intro TP.AE_pair_measure) auto | 
|   1064  |   1067  | 
|   1065   have "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<le> (\<integral>\<^sup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" |   1068   have "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<le> (\<integral>\<^sup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" | 
|   1066     apply (subst nn_integral_density) |   1069     by (subst nn_integral_density) | 
|   1067     apply simp |   1070        (auto intro!: nn_integral_mono simp: space_pair_measure ennreal_mult[symmetric]) | 
|   1068     apply (rule distributed_AE[OF Pxyz]) |   1071   also have "\<dots> = (\<integral>\<^sup>+(y, z). (\<integral>\<^sup>+ x. ennreal (Pxz (x, z)) * ennreal (Pyz (y, z) / Pz z) \<partial>S) \<partial>(T \<Otimes>\<^sub>M P))" | 
|   1069     apply auto [] |   1072     by (subst STP.nn_integral_snd[symmetric]) | 
|   1070     apply (rule nn_integral_mono_AE) |   1073        (auto simp add: split_beta' ennreal_mult[symmetric] space_pair_measure intro!: nn_integral_cong) | 
|   1071     using ae5 ae6 ae7 ae8 |   1074   also have "\<dots> = (\<integral>\<^sup>+x. ennreal (Pyz x) * 1 \<partial>T \<Otimes>\<^sub>M P)" | 
|         |   1075     apply (rule nn_integral_cong_AE) | 
|         |   1076     using aeX1 aeX2 aeX3 AE_space | 
|   1072     apply eventually_elim |   1077     apply eventually_elim | 
|   1073     apply auto |   1078   proof (case_tac x, simp add: space_pair_measure) | 
|   1074     done |   1079     fix a b assume "Pz b = 0 \<longrightarrow> Pyz (a, b) = 0" "a \<in> space T \<and> b \<in> space P" | 
|   1075   also have "\<dots> = (\<integral>\<^sup>+(y, z). \<integral>\<^sup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \<partial>S \<partial>T \<Otimes>\<^sub>M P)" |   1080       "(\<integral>\<^sup>+ x. ennreal (Pxz (x, b)) \<partial>S) = ennreal (Pz b)" | 
|   1076     by (subst STP.nn_integral_snd[symmetric]) (auto simp add: split_beta') |   1081     then show "(\<integral>\<^sup>+ x. ennreal (Pxz (x, b)) * ennreal (Pyz (a, b) / Pz b) \<partial>S) = ennreal (Pyz (a, b))" | 
|   1077   also have "\<dots> = (\<integral>\<^sup>+x. ereal (Pyz x) * 1 \<partial>T \<Otimes>\<^sub>M P)" |   1082       by (subst nn_integral_multc) (auto split: prod.split simp: ennreal_mult[symmetric]) | 
|   1078     apply (rule nn_integral_cong_AE) |         | 
|   1079     using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space |         | 
|   1080     apply eventually_elim |         | 
|   1081   proof (case_tac x, simp del: times_ereal.simps add: space_pair_measure) |         | 
|   1082     fix a b assume "Pz b = 0 \<longrightarrow> Pyz (a, b) = 0" "0 \<le> Pz b" "a \<in> space T \<and> b \<in> space P" |         | 
|   1083       "(\<integral>\<^sup>+ x. ereal (Pxz (x, b)) \<partial>S) = ereal (Pz b)" "0 \<le> Pyz (a, b)"  |         | 
|   1084     then show "(\<integral>\<^sup>+ x. ereal (Pxz (x, b)) * ereal (Pyz (a, b) / Pz b) \<partial>S) = ereal (Pyz (a, b))" |         | 
|   1085       by (subst nn_integral_multc) |         | 
|   1086          (auto split: prod.split) |         | 
|   1087   qed |   1083   qed | 
|   1088   also have "\<dots> = 1" |   1084   also have "\<dots> = 1" | 
|   1089     using Q.emeasure_space_1 distributed_AE[OF Pyz] distributed_distr_eq_density[OF Pyz] |   1085     using Q.emeasure_space_1 distributed_distr_eq_density[OF Pyz] | 
|   1090     by (subst nn_integral_density[symmetric]) auto |   1086     by (subst nn_integral_density[symmetric]) auto | 
|   1091   finally have le1: "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<le> 1" . |   1087   finally have le1: "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<le> 1" . | 
|   1092   also have "\<dots> < \<infinity>" by simp |   1088   also have "\<dots> < \<infinity>" by simp | 
|   1093   finally have fin: "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<noteq> \<infinity>" by simp |   1089   finally have fin: "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<noteq> \<infinity>" by simp | 
|   1094  |   1090  | 
|   1095   have pos: "(\<integral>\<^sup>+x. ?f x \<partial>?P) \<noteq> 0" |   1091   have pos: "(\<integral>\<^sup>+x. ?f x \<partial>?P) \<noteq> 0" | 
|   1096     apply (subst nn_integral_density) |   1092     apply (subst nn_integral_density) | 
|   1097     apply simp |   1093     apply (simp_all add: split_beta') | 
|   1098     apply (rule distributed_AE[OF Pxyz]) |         | 
|   1099     apply auto [] |         | 
|   1100     apply (simp add: split_beta') |         | 
|   1101   proof |   1094   proof | 
|   1102     let ?g = "\<lambda>x. ereal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))" |   1095     let ?g = "\<lambda>x. ennreal (Pxyz x) * (Pxz (fst x, snd (snd x)) * Pyz (snd x) / (Pz (snd (snd x)) * Pxyz x))" | 
|   1103     assume "(\<integral>\<^sup>+x. ?g x \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)) = 0" |   1096     assume "(\<integral>\<^sup>+x. ?g x \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)) = 0" | 
|   1104     then have "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. ?g x \<le> 0" |   1097     then have "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. ?g x = 0" | 
|   1105       by (intro nn_integral_0_iff_AE[THEN iffD1]) auto |   1098       by (intro nn_integral_0_iff_AE[THEN iffD1]) auto | 
|   1106     then have "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pxyz x = 0" |   1099     then have "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pxyz x = 0" | 
|   1107       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] |   1100       using ae1 ae2 ae3 ae4 AE_space | 
|   1108       by eventually_elim (auto split: if_split_asm simp: mult_le_0_iff divide_le_0_iff) |   1101       by eventually_elim (auto split: if_split_asm simp: mult_le_0_iff divide_le_0_iff space_pair_measure) | 
|   1109     then have "(\<integral>\<^sup>+ x. ereal (Pxyz x) \<partial>S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) = 0" |   1102     then have "(\<integral>\<^sup>+ x. ennreal (Pxyz x) \<partial>S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) = 0" | 
|   1110       by (subst nn_integral_cong_AE[of _ "\<lambda>x. 0"]) auto |   1103       by (subst nn_integral_cong_AE[of _ "\<lambda>x. 0"]) auto | 
|   1111     with P.emeasure_space_1 show False |   1104     with P.emeasure_space_1 show False | 
|   1112       by (subst (asm) emeasure_density) (auto cong: nn_integral_cong) |   1105       by (subst (asm) emeasure_density) (auto cong: nn_integral_cong) | 
|   1113   qed |   1106   qed | 
|   1114  |   1107  | 
|   1115   have neg: "(\<integral>\<^sup>+ x. - ?f x \<partial>?P) = 0" |   1108   have neg: "(\<integral>\<^sup>+ x. - ?f x \<partial>?P) = 0" | 
|   1116     apply (rule nn_integral_0_iff_AE[THEN iffD2]) |   1109     apply (rule nn_integral_0_iff_AE[THEN iffD2]) | 
|   1117     apply simp |   1110     apply simp | 
|   1118     apply (subst AE_density) |   1111     apply (subst AE_density) | 
|   1119     apply simp |   1112     apply (auto simp: space_pair_measure ennreal_neg) | 
|   1120     using ae5 ae6 ae7 ae8 |         | 
|   1121     apply eventually_elim |         | 
|   1122     apply auto |         | 
|   1123     done |   1113     done | 
|   1124  |   1114  | 
|   1125   have I3: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" |   1115   have I3: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" | 
|   1126     apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integrable_diff[OF I1 I2]]) |   1116     apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integrable_diff[OF I1 I2]]) | 
|   1127     using ae |   1117     using ae | 
|   1220     distr (distr M (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. (X x, Y x, Z x))) (S \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). (x, z))" |   1222     distr (distr M (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. (X x, Y x, Z x))) (S \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). (x, z))" | 
|   1221     by (simp add: distr_distr comp_def) |   1223     by (simp add: distr_distr comp_def) | 
|   1222  |   1224  | 
|   1223   have "mutual_information b S P X Z = |   1225   have "mutual_information b S P X Z = | 
|   1224     (\<integral>x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \<partial>(S \<Otimes>\<^sub>M P))" |   1226     (\<integral>x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \<partial>(S \<Otimes>\<^sub>M P))" | 
|   1225     by (rule mutual_information_distr[OF S P Px Pz Pxz]) |   1227     using Px Px_nn Pz Pz_nn Pxz Pxz_nn | 
|         |   1228     by (rule mutual_information_distr[OF S P]) (auto simp: space_pair_measure) | 
|   1226   also have "\<dots> = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" |   1229   also have "\<dots> = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" | 
|   1227     using b_gt_1 Pxz Px Pz |   1230     using b_gt_1 Pxz Pxz_nn Pxyz Pxyz_nn | 
|   1228     by (subst distributed_transform_integral[OF Pxyz Pxz, where T="\<lambda>(x, y, z). (x, z)"]) |   1231     by (subst distributed_transform_integral[OF Pxyz _ Pxz, where T="\<lambda>(x, y, z). (x, z)"]) | 
|   1229        (auto simp: split_beta') |   1232        (auto simp: split_beta') | 
|   1230   finally have mi_eq: |   1233   finally have mi_eq: | 
|   1231     "mutual_information b S P X Z = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" . |   1234     "mutual_information b S P X Z = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" . | 
|   1232    |   1235  | 
|   1233   have ae1: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Px (fst x) = 0 \<longrightarrow> Pxyz x = 0" |   1236   have ae1: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Px (fst x) = 0 \<longrightarrow> Pxyz x = 0" | 
|   1234     by (intro subdensity_real[of fst, OF _ Pxyz Px]) auto |   1237     by (intro subdensity_finite_entropy[of fst, OF _ Fxyz Fx]) auto | 
|   1235   moreover have ae2: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pz (snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" |   1238   moreover have ae2: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pz (snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" | 
|   1236     by (intro subdensity_real[of "\<lambda>x. snd (snd x)", OF _ Pxyz Pz]) auto |   1239     by (intro subdensity_finite_entropy[of "\<lambda>x. snd (snd x)", OF _ Fxyz Fz]) auto | 
|   1237   moreover have ae3: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pxz (fst x, snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" |   1240   moreover have ae3: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pxz (fst x, snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" | 
|   1238     by (intro subdensity_real[of "\<lambda>x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) auto |   1241     by (intro subdensity_finite_entropy[of "\<lambda>x. (fst x, snd (snd x))", OF _ Fxyz Fxz]) auto | 
|   1239   moreover have ae4: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pyz (snd x) = 0 \<longrightarrow> Pxyz x = 0" |   1242   moreover have ae4: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pyz (snd x) = 0 \<longrightarrow> Pxyz x = 0" | 
|   1240     by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) auto |   1243     by (intro subdensity_finite_entropy[of snd, OF _ Fxyz Fyz]) auto | 
|   1241   moreover have ae5: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. 0 \<le> Px (fst x)" |         | 
|   1242     using Px by (intro STP.AE_pair_measure) (auto dest: distributed_real_AE) |         | 
|   1243   moreover have ae6: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. 0 \<le> Pyz (snd x)" |         | 
|   1244     using Pyz by (intro STP.AE_pair_measure) (auto dest: distributed_real_AE) |         | 
|   1245   moreover have ae7: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. 0 \<le> Pz (snd (snd x))" |         | 
|   1246     using Pz Pz[THEN distributed_real_measurable] by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure AE_I2[of S] dest: distributed_real_AE) |         | 
|   1247   moreover have ae8: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. 0 \<le> Pxz (fst x, snd (snd x))" |         | 
|   1248     using Pxz[THEN distributed_real_AE, THEN SP.AE_pair] |         | 
|   1249     by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure simp: comp_def) |         | 
|   1250   moreover note ae9 = Pxyz[THEN distributed_real_AE] |         | 
|   1251   ultimately have ae: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. |   1244   ultimately have ae: "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. | 
|   1252     Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) - |   1245     Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) - | 
|   1253     Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) = |   1246     Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) = | 
|   1254     Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " |   1247     Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " | 
|         |   1248     using AE_space | 
|   1255   proof eventually_elim |   1249   proof eventually_elim | 
|   1256     case (elim x) |   1250     case (elim x) | 
|   1257     show ?case |   1251     show ?case | 
|   1258     proof cases |   1252     proof cases | 
|   1259       assume "Pxyz x \<noteq> 0" |   1253       assume "Pxyz x \<noteq> 0" | 
|   1260       with elim have "0 < Px (fst x)" "0 < Pz (snd (snd x))" "0 < Pxz (fst x, snd (snd x))" |   1254       with elim have "0 < Px (fst x)" "0 < Pz (snd (snd x))" "0 < Pxz (fst x, snd (snd x))" | 
|   1261         "0 < Pyz (snd x)" "0 < Pxyz x" |   1255         "0 < Pyz (snd x)" "0 < Pxyz x" | 
|   1262         by auto |   1256         using Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn | 
|         |   1257         by (auto simp: space_pair_measure less_le) | 
|   1263       then show ?thesis |   1258       then show ?thesis | 
|   1264         using b_gt_1 by (simp add: log_simps less_imp_le field_simps) |   1259         using b_gt_1 by (simp add: log_simps less_imp_le field_simps) | 
|   1265     qed simp |   1260     qed simp | 
|   1266   qed |   1261   qed | 
|   1267  |   1262  | 
|   1268   have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) |   1263   have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) | 
|   1269     (\<lambda>x. Pxyz x * log b (Pxyz x) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pyz (snd x)))" |   1264     (\<lambda>x. Pxyz x * log b (Pxyz x) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pyz (snd x)))" | 
|   1270     using finite_entropy_integrable[OF Fxyz] |   1265     using finite_entropy_integrable[OF Fxyz] | 
|   1271     using finite_entropy_integrable_transform[OF Fx Pxyz, of fst] |   1266     using finite_entropy_integrable_transform[OF Fx Pxyz Pxyz_nn, of fst] | 
|   1272     using finite_entropy_integrable_transform[OF Fyz Pxyz, of snd] |   1267     using finite_entropy_integrable_transform[OF Fyz Pxyz Pxyz_nn, of snd] | 
|   1273     by simp |   1268     by simp | 
|   1274   moreover have "(\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z)))) \<in> borel_measurable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)" |   1269   moreover have "(\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z)))) \<in> borel_measurable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)" | 
|   1275     using Pxyz Px Pyz by simp |   1270     using Pxyz Px Pyz by simp | 
|   1276   ultimately have I1: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" |   1271   ultimately have I1: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" | 
|   1277     apply (rule integrable_cong_AE_imp) |   1272     apply (rule integrable_cong_AE_imp) | 
|   1278     using ae1 ae4 ae5 ae6 ae9 |   1273     using ae1 ae4 AE_space | 
|   1279     by eventually_elim |   1274     by eventually_elim | 
|   1280        (auto simp: log_divide_eq log_mult_eq field_simps zero_less_mult_iff) |   1275        (insert Px_nn Pyz_nn Pxyz_nn, | 
|         |   1276         auto simp: log_divide_eq log_mult_eq field_simps zero_less_mult_iff space_pair_measure less_le) | 
|   1281  |   1277  | 
|   1282   have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) |   1278   have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) | 
|   1283     (\<lambda>x. Pxyz x * log b (Pxz (fst x, snd (snd x))) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pz (snd (snd x))))" |   1279     (\<lambda>x. Pxyz x * log b (Pxz (fst x, snd (snd x))) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pz (snd (snd x))))" | 
|   1284     using finite_entropy_integrable_transform[OF Fxz Pxyz, of "\<lambda>x. (fst x, snd (snd x))"] |   1280     using finite_entropy_integrable_transform[OF Fxz Pxyz Pxyz_nn, of "\<lambda>x. (fst x, snd (snd x))"] | 
|   1285     using finite_entropy_integrable_transform[OF Fx Pxyz, of fst] |   1281     using finite_entropy_integrable_transform[OF Fx Pxyz Pxyz_nn, of fst] | 
|   1286     using finite_entropy_integrable_transform[OF Fz Pxyz, of "snd \<circ> snd"] |   1282     using finite_entropy_integrable_transform[OF Fz Pxyz Pxyz_nn, of "snd \<circ> snd"] | 
|   1287     by simp |   1283     by simp | 
|   1288   moreover have "(\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z))) \<in> borel_measurable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)" |   1284   moreover have "(\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z))) \<in> borel_measurable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)" | 
|   1289     using Pxyz Px Pz |   1285     using Pxyz Px Pz | 
|   1290     by auto |   1286     by auto | 
|   1291   ultimately have I2: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" |   1287   ultimately have I2: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" | 
|   1292     apply (rule integrable_cong_AE_imp) |   1288     apply (rule integrable_cong_AE_imp) | 
|   1293     using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 ae9 |   1289     using ae1 ae2 ae3 ae4 AE_space | 
|   1294     by eventually_elim |   1290     by eventually_elim | 
|   1295        (auto simp: log_divide_eq log_mult_eq field_simps zero_less_mult_iff) |   1291        (insert Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn, | 
|         |   1292          auto simp: log_divide_eq log_mult_eq field_simps zero_less_mult_iff less_le space_pair_measure) | 
|   1296  |   1293  | 
|   1297   from ae I1 I2 show ?eq |   1294   from ae I1 I2 show ?eq | 
|   1298     unfolding conditional_mutual_information_def |   1295     unfolding conditional_mutual_information_def | 
|   1299     apply (subst mi_eq) |   1296     apply (subst mi_eq) | 
|   1300     apply (subst mutual_information_distr[OF S TP Px Pyz Pxyz]) |   1297     apply (subst mutual_information_distr[OF S TP Px Px_nn Pyz Pyz_nn Pxyz Pxyz_nn]) | 
|         |   1298     apply simp | 
|         |   1299     apply simp | 
|         |   1300     apply (simp add: space_pair_measure) | 
|   1301     apply (subst integral_diff[symmetric]) |   1301     apply (subst integral_diff[symmetric]) | 
|   1302     apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff) |   1302     apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff) | 
|   1303     done |   1303     done | 
|   1304  |   1304  | 
|   1305   let ?P = "density (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) Pxyz" |   1305   let ?P = "density (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) Pxyz" | 
|   1310   interpret Q: prob_space ?Q |   1310   interpret Q: prob_space ?Q | 
|   1311     unfolding distributed_distr_eq_density[OF Pyz, symmetric] by (rule prob_space_distr) simp |   1311     unfolding distributed_distr_eq_density[OF Pyz, symmetric] by (rule prob_space_distr) simp | 
|   1312  |   1312  | 
|   1313   let ?f = "\<lambda>(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)" |   1313   let ?f = "\<lambda>(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)" | 
|   1314  |   1314  | 
|   1315   from subdensity_real[of snd, OF _ Pyz Pz] |   1315   from subdensity_finite_entropy[of snd, OF _ Fyz Fz] | 
|   1316   have aeX1: "AE x in T \<Otimes>\<^sub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" by (auto simp: comp_def) |   1316   have aeX1: "AE x in T \<Otimes>\<^sub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" by (auto simp: comp_def) | 
|   1317   have aeX2: "AE x in T \<Otimes>\<^sub>M P. 0 \<le> Pz (snd x)" |   1317   have aeX2: "AE x in T \<Otimes>\<^sub>M P. 0 \<le> Pz (snd x)" | 
|   1318     using Pz by (intro TP.AE_pair_measure) (auto dest: distributed_real_AE) |   1318     using Pz by (intro TP.AE_pair_measure) (auto intro: Pz_nn) | 
|   1319  |   1319  | 
|   1320   have aeX3: "AE y in T \<Otimes>\<^sub>M P. (\<integral>\<^sup>+ x. ereal (Pxz (x, snd y)) \<partial>S) = ereal (Pz (snd y))" |   1320   have aeX3: "AE y in T \<Otimes>\<^sub>M P. (\<integral>\<^sup>+ x. ennreal (Pxz (x, snd y)) \<partial>S) = ennreal (Pz (snd y))" | 
|   1321     using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz] |   1321     using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz] | 
|   1322     by (intro TP.AE_pair_measure) (auto dest: distributed_real_AE) |   1322     by (intro TP.AE_pair_measure) (auto ) | 
|   1323   have "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<le> (\<integral>\<^sup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" |   1323   have "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<le> (\<integral>\<^sup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" | 
|   1324     apply (subst nn_integral_density) |   1324     using Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn | 
|   1325     apply (rule distributed_borel_measurable[OF Pxyz]) |   1325     by (subst nn_integral_density) | 
|   1326     apply (rule distributed_AE[OF Pxyz]) |   1326        (auto intro!: nn_integral_mono simp: space_pair_measure ennreal_mult[symmetric]) | 
|   1327     apply simp |   1327   also have "\<dots> = (\<integral>\<^sup>+(y, z). \<integral>\<^sup>+ x. ennreal (Pxz (x, z)) * ennreal (Pyz (y, z) / Pz z) \<partial>S \<partial>T \<Otimes>\<^sub>M P)" | 
|   1328     apply (rule nn_integral_mono_AE) |   1328     using Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn | 
|   1329     using ae5 ae6 ae7 ae8 |   1329     by (subst STP.nn_integral_snd[symmetric]) | 
|         |   1330        (auto simp add: split_beta' ennreal_mult[symmetric] space_pair_measure intro!: nn_integral_cong) | 
|         |   1331   also have "\<dots> = (\<integral>\<^sup>+x. ennreal (Pyz x) * 1 \<partial>T \<Otimes>\<^sub>M P)" | 
|         |   1332     apply (rule nn_integral_cong_AE) | 
|         |   1333     using aeX1 aeX2 aeX3 AE_space | 
|   1330     apply eventually_elim |   1334     apply eventually_elim | 
|   1331     apply auto |   1335   proof (case_tac x, simp add: space_pair_measure) | 
|   1332     done |         | 
|   1333   also have "\<dots> = (\<integral>\<^sup>+(y, z). \<integral>\<^sup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \<partial>S \<partial>T \<Otimes>\<^sub>M P)" |         | 
|   1334     by (subst STP.nn_integral_snd[symmetric]) (auto simp add: split_beta') |         | 
|   1335   also have "\<dots> = (\<integral>\<^sup>+x. ereal (Pyz x) * 1 \<partial>T \<Otimes>\<^sub>M P)" |         | 
|   1336     apply (rule nn_integral_cong_AE) |         | 
|   1337     using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space |         | 
|   1338     apply eventually_elim |         | 
|   1339   proof (case_tac x, simp del: times_ereal.simps add: space_pair_measure) |         | 
|   1340     fix a b assume "Pz b = 0 \<longrightarrow> Pyz (a, b) = 0" "0 \<le> Pz b" "a \<in> space T \<and> b \<in> space P" |   1336     fix a b assume "Pz b = 0 \<longrightarrow> Pyz (a, b) = 0" "0 \<le> Pz b" "a \<in> space T \<and> b \<in> space P" | 
|   1341       "(\<integral>\<^sup>+ x. ereal (Pxz (x, b)) \<partial>S) = ereal (Pz b)" "0 \<le> Pyz (a, b)"  |   1337       "(\<integral>\<^sup>+ x. ennreal (Pxz (x, b)) \<partial>S) = ennreal (Pz b)" | 
|   1342     then show "(\<integral>\<^sup>+ x. ereal (Pxz (x, b)) * ereal (Pyz (a, b) / Pz b) \<partial>S) = ereal (Pyz (a, b))" |   1338     then show "(\<integral>\<^sup>+ x. ennreal (Pxz (x, b)) * ennreal (Pyz (a, b) / Pz b) \<partial>S) = ennreal (Pyz (a, b))" | 
|   1343       by (subst nn_integral_multc) auto |   1339       using Pyz_nn[of "(a,b)"] | 
|         |   1340       by (subst nn_integral_multc) (auto simp: space_pair_measure ennreal_mult[symmetric]) | 
|   1344   qed |   1341   qed | 
|   1345   also have "\<dots> = 1" |   1342   also have "\<dots> = 1" | 
|   1346     using Q.emeasure_space_1 distributed_AE[OF Pyz] distributed_distr_eq_density[OF Pyz] |   1343     using Q.emeasure_space_1 Pyz_nn distributed_distr_eq_density[OF Pyz] | 
|   1347     by (subst nn_integral_density[symmetric]) auto |   1344     by (subst nn_integral_density[symmetric]) auto | 
|   1348   finally have le1: "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<le> 1" . |   1345   finally have le1: "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<le> 1" . | 
|   1349   also have "\<dots> < \<infinity>" by simp |   1346   also have "\<dots> < \<infinity>" by simp | 
|   1350   finally have fin: "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<noteq> \<infinity>" by simp |   1347   finally have fin: "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<noteq> \<infinity>" by simp | 
|   1351  |   1348  | 
|   1352   have pos: "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<noteq> 0" |   1349   have "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<noteq> 0" | 
|         |   1350     using Pxyz_nn | 
|   1353     apply (subst nn_integral_density) |   1351     apply (subst nn_integral_density) | 
|   1354     apply simp |   1352     apply (simp_all add: split_beta'  ennreal_mult'[symmetric] cong: nn_integral_cong) | 
|   1355     apply (rule distributed_AE[OF Pxyz]) |         | 
|   1356     apply simp |         | 
|   1357     apply (simp add: split_beta') |         | 
|   1358   proof |   1353   proof | 
|   1359     let ?g = "\<lambda>x. ereal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))" |   1354     let ?g = "\<lambda>x. ennreal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))" | 
|   1360     assume "(\<integral>\<^sup>+ x. ?g x \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)) = 0" |   1355     assume "(\<integral>\<^sup>+ x. ?g x \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)) = 0" | 
|   1361     then have "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. ?g x \<le> 0" |   1356     then have "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. ?g x = 0" | 
|   1362       by (intro nn_integral_0_iff_AE[THEN iffD1]) (auto intro!: borel_measurable_ereal measurable_If) |   1357       by (intro nn_integral_0_iff_AE[THEN iffD1]) auto | 
|   1363     then have "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pxyz x = 0" |   1358     then have "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pxyz x = 0" | 
|   1364       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] |   1359       using ae1 ae2 ae3 ae4 AE_space | 
|   1365       by eventually_elim (auto split: if_split_asm simp: mult_le_0_iff divide_le_0_iff) |   1360       by eventually_elim | 
|   1366     then have "(\<integral>\<^sup>+ x. ereal (Pxyz x) \<partial>S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) = 0" |   1361          (insert Px_nn Pz_nn Pxz_nn Pyz_nn, | 
|         |   1362            auto split: if_split_asm simp: mult_le_0_iff divide_le_0_iff space_pair_measure) | 
|         |   1363     then have "(\<integral>\<^sup>+ x. ennreal (Pxyz x) \<partial>S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) = 0" | 
|   1367       by (subst nn_integral_cong_AE[of _ "\<lambda>x. 0"]) auto |   1364       by (subst nn_integral_cong_AE[of _ "\<lambda>x. 0"]) auto | 
|   1368     with P.emeasure_space_1 show False |   1365     with P.emeasure_space_1 show False | 
|   1369       by (subst (asm) emeasure_density) (auto cong: nn_integral_cong) |   1366       by (subst (asm) emeasure_density) (auto cong: nn_integral_cong) | 
|   1370   qed |   1367   qed | 
|         |   1368   then have pos: "0 < (\<integral>\<^sup>+ x. ?f x \<partial>?P)" | 
|         |   1369     by (simp add: zero_less_iff_neq_zero) | 
|   1371  |   1370  | 
|   1372   have neg: "(\<integral>\<^sup>+ x. - ?f x \<partial>?P) = 0" |   1371   have neg: "(\<integral>\<^sup>+ x. - ?f x \<partial>?P) = 0" | 
|   1373     apply (rule nn_integral_0_iff_AE[THEN iffD2]) |   1372     using Pz_nn Pxz_nn Pyz_nn Pxyz_nn | 
|   1374     apply (auto simp: split_beta') [] |   1373     by (intro nn_integral_0_iff_AE[THEN iffD2]) | 
|   1375     apply (subst AE_density) |   1374        (auto simp: split_beta' AE_density space_pair_measure intro!: AE_I2 ennreal_neg) | 
|   1376     apply (auto simp: split_beta') [] |         | 
|   1377     using ae5 ae6 ae7 ae8 |         | 
|   1378     apply eventually_elim |         | 
|   1379     apply auto |         | 
|   1380     done |         | 
|   1381  |   1375  | 
|   1382   have I3: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" |   1376   have I3: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" | 
|   1383     apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integrable_diff[OF I1 I2]]) |   1377     apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integrable_diff[OF I1 I2]]) | 
|   1384     using ae |   1378     using ae | 
|   1385     apply (auto simp: split_beta') |   1379     apply (auto simp: split_beta') | 
|   1489   let ?j = "\<lambda>x y z. Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z)))" |   1483   let ?j = "\<lambda>x y z. Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z)))" | 
|   1490   have "(\<lambda>(x, y, z). ?i x y z) = (\<lambda>x. if x \<in> (\<lambda>x. (X x, Y x, Z x)) ` space M then ?j (fst x) (fst (snd x)) (snd (snd x)) else 0)" |   1484   have "(\<lambda>(x, y, z). ?i x y z) = (\<lambda>x. if x \<in> (\<lambda>x. (X x, Y x, Z x)) ` space M then ?j (fst x) (fst (snd x)) (snd (snd x)) else 0)" | 
|   1491     by (auto intro!: ext) |   1485     by (auto intro!: ext) | 
|   1492   then show "(\<integral> (x, y, z). ?i x y z \<partial>?P) = (\<Sum>(x, y, z)\<in>(\<lambda>x. (X x, Y x, Z x)) ` space M. ?j x y z)" |   1486   then show "(\<integral> (x, y, z). ?i x y z \<partial>?P) = (\<Sum>(x, y, z)\<in>(\<lambda>x. (X x, Y x, Z x)) ` space M. ?j x y z)" | 
|   1493     by (auto intro!: setsum.cong simp add: \<open>?P = ?C\<close> lebesgue_integral_count_space_finite simple_distributed_finite setsum.If_cases split_beta') |   1487     by (auto intro!: setsum.cong simp add: \<open>?P = ?C\<close> lebesgue_integral_count_space_finite simple_distributed_finite setsum.If_cases split_beta') | 
|   1494 qed |   1488 qed (insert Pz Pyz Pxz Pxyz, auto intro: measure_nonneg) | 
|   1495  |   1489  | 
|   1496 lemma (in information_space) conditional_mutual_information_nonneg: |   1490 lemma (in information_space) conditional_mutual_information_nonneg: | 
|   1497   assumes X: "simple_function M X" and Y: "simple_function M Y" and Z: "simple_function M Z" |   1491   assumes X: "simple_function M X" and Y: "simple_function M Y" and Z: "simple_function M Z" | 
|   1498   shows "0 \<le> \<I>(X ; Y | Z)" |   1492   shows "0 \<le> \<I>(X ; Y | Z)" | 
|   1499 proof - |   1493 proof - | 
|   1500   have [simp]: "count_space (X ` space M) \<Otimes>\<^sub>M count_space (Y ` space M) \<Otimes>\<^sub>M count_space (Z ` space M) = |   1494   have [simp]: "count_space (X ` space M) \<Otimes>\<^sub>M count_space (Y ` space M) \<Otimes>\<^sub>M count_space (Z ` space M) = | 
|   1501       count_space (X`space M \<times> Y`space M \<times> Z`space M)" |   1495       count_space (X`space M \<times> Y`space M \<times> Z`space M)" | 
|   1502     by (simp add: pair_measure_count_space X Y Z simple_functionD) |   1496     by (simp add: pair_measure_count_space X Y Z simple_functionD) | 
|   1503   note sf = sigma_finite_measure_count_space_finite[OF simple_functionD(1)] |   1497   note sf = sigma_finite_measure_count_space_finite[OF simple_functionD(1)] | 
|   1504   note sd = simple_distributedI[OF _ refl] |   1498   note sd = simple_distributedI[OF _ _ refl] | 
|   1505   note sp = simple_function_Pair |   1499   note sp = simple_function_Pair | 
|   1506   show ?thesis |   1500   show ?thesis | 
|   1507    apply (rule conditional_mutual_information_generic_nonneg[OF sf[OF X] sf[OF Y] sf[OF Z]]) |   1501    apply (rule conditional_mutual_information_generic_nonneg[OF sf[OF X] sf[OF Y] sf[OF Z]]) | 
|   1508    apply (rule simple_distributed[OF sd[OF X]]) |   1502    apply (rule simple_distributed[OF sd[OF X]]) | 
|         |   1503    apply simp | 
|         |   1504    apply simp | 
|   1509    apply (rule simple_distributed[OF sd[OF Z]]) |   1505    apply (rule simple_distributed[OF sd[OF Z]]) | 
|         |   1506    apply simp | 
|         |   1507    apply simp | 
|   1510    apply (rule simple_distributed_joint[OF sd[OF sp[OF Y Z]]]) |   1508    apply (rule simple_distributed_joint[OF sd[OF sp[OF Y Z]]]) | 
|         |   1509    apply simp | 
|         |   1510    apply simp | 
|   1511    apply (rule simple_distributed_joint[OF sd[OF sp[OF X Z]]]) |   1511    apply (rule simple_distributed_joint[OF sd[OF sp[OF X Z]]]) | 
|         |   1512    apply simp | 
|         |   1513    apply simp | 
|   1512    apply (rule simple_distributed_joint2[OF sd[OF sp[OF X sp[OF Y Z]]]]) |   1514    apply (rule simple_distributed_joint2[OF sd[OF sp[OF X sp[OF Y Z]]]]) | 
|         |   1515    apply simp | 
|         |   1516    apply simp | 
|   1513    apply (auto intro!: integrable_count_space simp: X Y Z simple_functionD) |   1517    apply (auto intro!: integrable_count_space simp: X Y Z simple_functionD) | 
|   1514    done |   1518    done | 
|   1515 qed |   1519 qed | 
|   1516  |   1520  | 
|   1517 subsection \<open>Conditional Entropy\<close> |   1521 subsection \<open>Conditional Entropy\<close> | 
|   1518  |   1522  | 
|   1519 definition (in prob_space) |   1523 definition (in prob_space) | 
|   1520   "conditional_entropy b S T X Y = - (\<integral>(x, y). log b (real_of_ereal (RN_deriv (S \<Otimes>\<^sub>M T) (distr M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x))) (x, y)) /  |   1524   "conditional_entropy b S T X Y = - (\<integral>(x, y). log b (enn2real (RN_deriv (S \<Otimes>\<^sub>M T) (distr M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x))) (x, y)) / | 
|   1521     real_of_ereal (RN_deriv T (distr M T Y) y)) \<partial>distr M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)))" |   1525     enn2real (RN_deriv T (distr M T Y) y)) \<partial>distr M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)))" | 
|   1522  |   1526  | 
|   1523 abbreviation (in information_space) |   1527 abbreviation (in information_space) | 
|   1524   conditional_entropy_Pow ("\<H>'(_ | _')") where |   1528   conditional_entropy_Pow ("\<H>'(_ | _')") where | 
|   1525   "\<H>(X | Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y" |   1529   "\<H>(X | Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y" | 
|   1526  |   1530  | 
|   1527 lemma (in information_space) conditional_entropy_generic_eq: |   1531 lemma (in information_space) conditional_entropy_generic_eq: | 
|   1528   fixes Pxy :: "_ \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" |   1532   fixes Pxy :: "_ \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" | 
|   1529   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" |   1533   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" | 
|   1530   assumes Py[measurable]: "distributed M T Y Py" |   1534   assumes Py[measurable]: "distributed M T Y Py" and Py_nn[simp]: "\<And>x. x \<in> space T \<Longrightarrow> 0 \<le> Py x" | 
|   1531   assumes Pxy[measurable]: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy" |   1535   assumes Pxy[measurable]: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy" | 
|         |   1536     and Pxy_nn[simp]: "\<And>x y. x \<in> space S \<Longrightarrow> y \<in> space T \<Longrightarrow> 0 \<le> Pxy (x, y)" | 
|   1532   shows "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^sub>M T))" |   1537   shows "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^sub>M T))" | 
|   1533 proof - |   1538 proof - | 
|   1534   interpret S: sigma_finite_measure S by fact |   1539   interpret S: sigma_finite_measure S by fact | 
|   1535   interpret T: sigma_finite_measure T by fact |   1540   interpret T: sigma_finite_measure T by fact | 
|   1536   interpret ST: pair_sigma_finite S T .. |   1541   interpret ST: pair_sigma_finite S T .. | 
|   1537  |   1542  | 
|   1538   have "AE x in density (S \<Otimes>\<^sub>M T) (\<lambda>x. ereal (Pxy x)). Pxy x = real_of_ereal (RN_deriv (S \<Otimes>\<^sub>M T) (distr M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x))) x)" |   1543   have [measurable]: "Py \<in> T \<rightarrow>\<^sub>M borel" | 
|         |   1544     using Py Py_nn by (intro distributed_real_measurable) | 
|         |   1545   have measurable_Pxy[measurable]: "Pxy \<in> (S \<Otimes>\<^sub>M T) \<rightarrow>\<^sub>M borel" | 
|         |   1546     using Pxy Pxy_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure) | 
|         |   1547  | 
|         |   1548   have "AE x in density (S \<Otimes>\<^sub>M T) (\<lambda>x. ennreal (Pxy x)). Pxy x = enn2real (RN_deriv (S \<Otimes>\<^sub>M T) (distr M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x))) x)" | 
|   1539     unfolding AE_density[OF distributed_borel_measurable, OF Pxy] |   1549     unfolding AE_density[OF distributed_borel_measurable, OF Pxy] | 
|   1540     unfolding distributed_distr_eq_density[OF Pxy] |   1550     unfolding distributed_distr_eq_density[OF Pxy] | 
|   1541     using distributed_RN_deriv[OF Pxy] |   1551     using distributed_RN_deriv[OF Pxy] | 
|   1542     by auto |   1552     by auto | 
|   1543   moreover |   1553   moreover | 
|   1544   have "AE x in density (S \<Otimes>\<^sub>M T) (\<lambda>x. ereal (Pxy x)). Py (snd x) = real_of_ereal (RN_deriv T (distr M T Y) (snd x))" |   1554   have "AE x in density (S \<Otimes>\<^sub>M T) (\<lambda>x. ennreal (Pxy x)). Py (snd x) = enn2real (RN_deriv T (distr M T Y) (snd x))" | 
|   1545     unfolding AE_density[OF distributed_borel_measurable, OF Pxy] |   1555     unfolding AE_density[OF distributed_borel_measurable, OF Pxy] | 
|   1546     unfolding distributed_distr_eq_density[OF Py] |   1556     unfolding distributed_distr_eq_density[OF Py] | 
|   1547     apply (rule ST.AE_pair_measure) |   1557     apply (rule ST.AE_pair_measure) | 
|   1548     apply auto |   1558     apply auto | 
|   1549     using distributed_RN_deriv[OF Py] |   1559     using distributed_RN_deriv[OF Py] | 
|   1550     apply auto |   1560     apply auto | 
|   1551     done     |   1561     done | 
|   1552   ultimately |   1562   ultimately | 
|   1553   have "conditional_entropy b S T X Y = - (\<integral>x. Pxy x * log b (Pxy x / Py (snd x)) \<partial>(S \<Otimes>\<^sub>M T))" |   1563   have "conditional_entropy b S T X Y = - (\<integral>x. Pxy x * log b (Pxy x / Py (snd x)) \<partial>(S \<Otimes>\<^sub>M T))" | 
|   1554     unfolding conditional_entropy_def neg_equal_iff_equal |   1564     unfolding conditional_entropy_def neg_equal_iff_equal | 
|   1555     apply (subst integral_real_density[symmetric]) |   1565     apply (subst integral_real_density[symmetric]) | 
|   1556     apply (auto simp: distributed_real_AE[OF Pxy] distributed_distr_eq_density[OF Pxy] |   1566     apply (auto simp: distributed_distr_eq_density[OF Pxy] space_pair_measure | 
|   1557                 intro!: integral_cong_AE) |   1567                 intro!: integral_cong_AE) | 
|   1558     done |   1568     done | 
|   1559   then show ?thesis by (simp add: split_beta') |   1569   then show ?thesis by (simp add: split_beta') | 
|   1560 qed |   1570 qed | 
|   1561  |   1571  | 
|   1562 lemma (in information_space) conditional_entropy_eq_entropy: |   1572 lemma (in information_space) conditional_entropy_eq_entropy: | 
|   1563   fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" |   1573   fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" | 
|   1564   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" |   1574   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" | 
|   1565   assumes Py[measurable]: "distributed M T Y Py" |   1575   assumes Py[measurable]: "distributed M T Y Py" | 
|         |   1576     and Py_nn[simp]: "\<And>x. x \<in> space T \<Longrightarrow> 0 \<le> Py x" | 
|   1566   assumes Pxy[measurable]: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy" |   1577   assumes Pxy[measurable]: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy" | 
|         |   1578     and Pxy_nn[simp]: "\<And>x y. x \<in> space S \<Longrightarrow> y \<in> space T \<Longrightarrow> 0 \<le> Pxy (x, y)" | 
|   1567   assumes I1: "integrable (S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Pxy x))" |   1579   assumes I1: "integrable (S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Pxy x))" | 
|   1568   assumes I2: "integrable (S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))" |   1580   assumes I2: "integrable (S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))" | 
|   1569   shows "conditional_entropy b S T X Y = entropy b (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) - entropy b T Y" |   1581   shows "conditional_entropy b S T X Y = entropy b (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) - entropy b T Y" | 
|   1570 proof - |   1582 proof - | 
|   1571   interpret S: sigma_finite_measure S by fact |   1583   interpret S: sigma_finite_measure S by fact | 
|   1572   interpret T: sigma_finite_measure T by fact |   1584   interpret T: sigma_finite_measure T by fact | 
|   1573   interpret ST: pair_sigma_finite S T .. |   1585   interpret ST: pair_sigma_finite S T .. | 
|   1574  |   1586  | 
|         |   1587   have [measurable]: "Py \<in> T \<rightarrow>\<^sub>M borel" | 
|         |   1588     using Py Py_nn by (intro distributed_real_measurable) | 
|         |   1589   have measurable_Pxy[measurable]: "Pxy \<in> (S \<Otimes>\<^sub>M T) \<rightarrow>\<^sub>M borel" | 
|         |   1590     using Pxy Pxy_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure) | 
|         |   1591  | 
|   1575   have "entropy b T Y = - (\<integral>y. Py y * log b (Py y) \<partial>T)" |   1592   have "entropy b T Y = - (\<integral>y. Py y * log b (Py y) \<partial>T)" | 
|   1576     by (rule entropy_distr[OF Py]) |   1593     by (rule entropy_distr[OF Py Py_nn]) | 
|   1577   also have "\<dots> = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^sub>M T))" |   1594   also have "\<dots> = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^sub>M T))" | 
|   1578     using b_gt_1 Py[THEN distributed_real_measurable] |   1595     using b_gt_1 | 
|   1579     by (subst distributed_transform_integral[OF Pxy Py, where T=snd]) (auto intro!: integral_cong) |   1596     by (subst distributed_transform_integral[OF Pxy _ Py, where T=snd]) | 
|         |   1597        (auto intro!: integral_cong simp: space_pair_measure) | 
|   1580   finally have e_eq: "entropy b T Y = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^sub>M T))" . |   1598   finally have e_eq: "entropy b T Y = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^sub>M T))" . | 
|   1581  |   1599  | 
|         |   1600   have **: "\<And>x. x \<in> space (S \<Otimes>\<^sub>M T) \<Longrightarrow> 0 \<le> Pxy x" | 
|         |   1601     by (auto simp: space_pair_measure) | 
|         |   1602  | 
|   1582   have ae2: "AE x in S \<Otimes>\<^sub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" |   1603   have ae2: "AE x in S \<Otimes>\<^sub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" | 
|   1583     by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair) |   1604     by (intro subdensity_real[of snd, OF _ Pxy Py]) | 
|         |   1605        (auto intro: measurable_Pair simp: space_pair_measure) | 
|   1584   moreover have ae4: "AE x in S \<Otimes>\<^sub>M T. 0 \<le> Py (snd x)" |   1606   moreover have ae4: "AE x in S \<Otimes>\<^sub>M T. 0 \<le> Py (snd x)" | 
|   1585     using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) |   1607     using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'') | 
|   1586   moreover note ae5 = Pxy[THEN distributed_real_AE] |         | 
|   1587   ultimately have "AE x in S \<Otimes>\<^sub>M T. 0 \<le> Pxy x \<and> 0 \<le> Py (snd x) \<and> |   1608   ultimately have "AE x in S \<Otimes>\<^sub>M T. 0 \<le> Pxy x \<and> 0 \<le> Py (snd x) \<and> | 
|   1588     (Pxy x = 0 \<or> (Pxy x \<noteq> 0 \<longrightarrow> 0 < Pxy x \<and> 0 < Py (snd x)))" |   1609     (Pxy x = 0 \<or> (Pxy x \<noteq> 0 \<longrightarrow> 0 < Pxy x \<and> 0 < Py (snd x)))" | 
|   1589     by eventually_elim auto |   1610     using AE_space by eventually_elim (auto simp: space_pair_measure less_le) | 
|   1590   then have ae: "AE x in S \<Otimes>\<^sub>M T. |   1611   then have ae: "AE x in S \<Otimes>\<^sub>M T. | 
|   1591      Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / Py (snd x))" |   1612      Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / Py (snd x))" | 
|   1592     by eventually_elim (auto simp: log_simps field_simps b_gt_1) |   1613     by eventually_elim (auto simp: log_simps field_simps b_gt_1) | 
|   1593   have "conditional_entropy b S T X Y =  |   1614   have "conditional_entropy b S T X Y = | 
|   1594     - (\<integral>x. Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^sub>M T))" |   1615     - (\<integral>x. Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^sub>M T))" | 
|   1595     unfolding conditional_entropy_generic_eq[OF S T Py Pxy] neg_equal_iff_equal |   1616     unfolding conditional_entropy_generic_eq[OF S T Py Py_nn Pxy Pxy_nn, simplified] neg_equal_iff_equal | 
|   1596     apply (intro integral_cong_AE) |   1617     apply (intro integral_cong_AE) | 
|   1597     using ae |   1618     using ae | 
|   1598     apply auto |   1619     apply auto | 
|   1599     done |   1620     done | 
|   1600   also have "\<dots> = - (\<integral>x. Pxy x * log b (Pxy x) \<partial>(S \<Otimes>\<^sub>M T)) - - (\<integral>x.  Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^sub>M T))" |   1621   also have "\<dots> = - (\<integral>x. Pxy x * log b (Pxy x) \<partial>(S \<Otimes>\<^sub>M T)) - - (\<integral>x.  Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^sub>M T))" | 
|   1601     by (simp add: integral_diff[OF I1 I2]) |   1622     by (simp add: integral_diff[OF I1 I2]) | 
|   1602   finally show ?thesis  |   1623   finally show ?thesis | 
|   1603     unfolding conditional_entropy_generic_eq[OF S T Py Pxy] entropy_distr[OF Pxy] e_eq |   1624     using conditional_entropy_generic_eq[OF S T Py Py_nn Pxy Pxy_nn, simplified] | 
|         |   1625       entropy_distr[OF Pxy **, simplified] e_eq | 
|   1604     by (simp add: split_beta') |   1626     by (simp add: split_beta') | 
|   1605 qed |   1627 qed | 
|   1606  |   1628  | 
|   1607 lemma (in information_space) conditional_entropy_eq_entropy_simple: |   1629 lemma (in information_space) conditional_entropy_eq_entropy_simple: | 
|   1608   assumes X: "simple_function M X" and Y: "simple_function M Y" |   1630   assumes X: "simple_function M X" and Y: "simple_function M Y" | 
|   1688 subsection \<open>Equalities\<close> |   1713 subsection \<open>Equalities\<close> | 
|   1689  |   1714  | 
|   1690 lemma (in information_space) mutual_information_eq_entropy_conditional_entropy_distr: |   1715 lemma (in information_space) mutual_information_eq_entropy_conditional_entropy_distr: | 
|   1691   fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" and Pxy :: "('b \<times> 'c) \<Rightarrow> real" |   1716   fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" and Pxy :: "('b \<times> 'c) \<Rightarrow> real" | 
|   1692   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" |   1717   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" | 
|   1693   assumes Px[measurable]: "distributed M S X Px" and Py[measurable]: "distributed M T Y Py" |   1718   assumes Px[measurable]: "distributed M S X Px" | 
|   1694   assumes Pxy[measurable]: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy" |   1719     and Px_nn[simp]: "\<And>x. x \<in> space S \<Longrightarrow> 0 \<le> Px x" | 
|         |   1720     and Py[measurable]: "distributed M T Y Py" | 
|         |   1721     and Py_nn[simp]: "\<And>x. x \<in> space T \<Longrightarrow> 0 \<le> Py x" | 
|         |   1722     and Pxy[measurable]: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy" | 
|         |   1723     and Pxy_nn[simp]: "\<And>x y. x \<in> space S \<Longrightarrow> y \<in> space T \<Longrightarrow> 0 \<le> Pxy (x, y)" | 
|   1695   assumes Ix: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Px (fst x)))" |   1724   assumes Ix: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Px (fst x)))" | 
|   1696   assumes Iy: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))" |   1725   assumes Iy: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))" | 
|   1697   assumes Ixy: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Pxy x))" |   1726   assumes Ixy: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Pxy x))" | 
|   1698   shows  "mutual_information b S T X Y = entropy b S X + entropy b T Y - entropy b (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x))" |   1727   shows  "mutual_information b S T X Y = entropy b S X + entropy b T Y - entropy b (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x))" | 
|   1699 proof - |   1728 proof - | 
|         |   1729   have [measurable]: "Px \<in> S \<rightarrow>\<^sub>M borel" | 
|         |   1730     using Px Px_nn by (intro distributed_real_measurable) | 
|         |   1731   have [measurable]: "Py \<in> T \<rightarrow>\<^sub>M borel" | 
|         |   1732     using Py Py_nn by (intro distributed_real_measurable) | 
|         |   1733   have measurable_Pxy[measurable]: "Pxy \<in> (S \<Otimes>\<^sub>M T) \<rightarrow>\<^sub>M borel" | 
|         |   1734     using Pxy Pxy_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure) | 
|         |   1735  | 
|   1700   have X: "entropy b S X = - (\<integral>x. Pxy x * log b (Px (fst x)) \<partial>(S \<Otimes>\<^sub>M T))" |   1736   have X: "entropy b S X = - (\<integral>x. Pxy x * log b (Px (fst x)) \<partial>(S \<Otimes>\<^sub>M T))" | 
|   1701     using b_gt_1 Px[THEN distributed_real_measurable] |   1737     using b_gt_1 | 
|   1702     apply (subst entropy_distr[OF Px]) |   1738     apply (subst entropy_distr[OF Px Px_nn], simp) | 
|   1703     apply (subst distributed_transform_integral[OF Pxy Px, where T=fst]) |   1739     apply (subst distributed_transform_integral[OF Pxy _ Px, where T=fst]) | 
|   1704     apply (auto intro!: integral_cong) |   1740     apply (auto intro!: integral_cong simp: space_pair_measure) | 
|   1705     done |   1741     done | 
|   1706  |   1742  | 
|   1707   have Y: "entropy b T Y = - (\<integral>x. Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^sub>M T))" |   1743   have Y: "entropy b T Y = - (\<integral>x. Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^sub>M T))" | 
|   1708     using b_gt_1 Py[THEN distributed_real_measurable] |   1744     using b_gt_1 | 
|   1709     apply (subst entropy_distr[OF Py]) |   1745     apply (subst entropy_distr[OF Py Py_nn], simp) | 
|   1710     apply (subst distributed_transform_integral[OF Pxy Py, where T=snd]) |   1746     apply (subst distributed_transform_integral[OF Pxy _ Py, where T=snd]) | 
|   1711     apply (auto intro!: integral_cong) |   1747     apply (auto intro!: integral_cong simp: space_pair_measure) | 
|   1712     done |   1748     done | 
|   1713  |   1749  | 
|   1714   interpret S: sigma_finite_measure S by fact |   1750   interpret S: sigma_finite_measure S by fact | 
|   1715   interpret T: sigma_finite_measure T by fact |   1751   interpret T: sigma_finite_measure T by fact | 
|   1716   interpret ST: pair_sigma_finite S T .. |   1752   interpret ST: pair_sigma_finite S T .. | 
|   1717   have ST: "sigma_finite_measure (S \<Otimes>\<^sub>M T)" .. |   1753   have ST: "sigma_finite_measure (S \<Otimes>\<^sub>M T)" .. | 
|   1718  |   1754  | 
|   1719   have XY: "entropy b (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) = - (\<integral>x. Pxy x * log b (Pxy x) \<partial>(S \<Otimes>\<^sub>M T))" |   1755   have XY: "entropy b (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) = - (\<integral>x. Pxy x * log b (Pxy x) \<partial>(S \<Otimes>\<^sub>M T))" | 
|   1720     by (subst entropy_distr[OF Pxy]) (auto intro!: integral_cong) |   1756     by (subst entropy_distr[OF Pxy]) (auto intro!: integral_cong simp: space_pair_measure) | 
|   1721    |   1757  | 
|   1722   have "AE x in S \<Otimes>\<^sub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0" |   1758   have "AE x in S \<Otimes>\<^sub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0" | 
|   1723     by (intro subdensity_real[of fst, OF _ Pxy Px]) (auto intro: measurable_Pair) |   1759     by (intro subdensity_real[of fst, OF _ Pxy Px]) (auto intro: measurable_Pair simp: space_pair_measure) | 
|   1724   moreover have "AE x in S \<Otimes>\<^sub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" |   1760   moreover have "AE x in S \<Otimes>\<^sub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" | 
|   1725     by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair) |   1761     by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair simp: space_pair_measure) | 
|   1726   moreover have "AE x in S \<Otimes>\<^sub>M T. 0 \<le> Px (fst x)" |   1762   moreover have "AE x in S \<Otimes>\<^sub>M T. 0 \<le> Px (fst x)" | 
|   1727     using Px by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable) |   1763     using Px by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'') | 
|   1728   moreover have "AE x in S \<Otimes>\<^sub>M T. 0 \<le> Py (snd x)" |   1764   moreover have "AE x in S \<Otimes>\<^sub>M T. 0 \<le> Py (snd x)" | 
|   1729     using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) |   1765     using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'') | 
|   1730   moreover note Pxy[THEN distributed_real_AE] |   1766   ultimately have "AE x in S \<Otimes>\<^sub>M T. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)) = | 
|   1731   ultimately have "AE x in S \<Otimes>\<^sub>M T. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)) =  |         | 
|   1732     Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" |   1767     Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" | 
|   1733     (is "AE x in _. ?f x = ?g x") |   1768     (is "AE x in _. ?f x = ?g x") | 
|         |   1769     using AE_space | 
|   1734   proof eventually_elim |   1770   proof eventually_elim | 
|   1735     case (elim x) |   1771     case (elim x) | 
|   1736     show ?case |   1772     show ?case | 
|   1737     proof cases |   1773     proof cases | 
|   1738       assume "Pxy x \<noteq> 0" |   1774       assume "Pxy x \<noteq> 0" | 
|   1739       with elim have "0 < Px (fst x)" "0 < Py (snd x)" "0 < Pxy x" |   1775       with elim have "0 < Px (fst x)" "0 < Py (snd x)" "0 < Pxy x" | 
|   1740         by auto |   1776         by (auto simp: space_pair_measure less_le) | 
|   1741       then show ?thesis |   1777       then show ?thesis | 
|   1742         using b_gt_1 by (simp add: log_simps less_imp_le field_simps) |   1778         using b_gt_1 by (simp add: log_simps less_imp_le field_simps) | 
|   1743     qed simp |   1779     qed simp | 
|   1744   qed |   1780   qed | 
|   1745  |   1781  | 
|   1752     apply (simp add: field_simps) |   1788     apply (simp add: field_simps) | 
|   1753     done |   1789     done | 
|   1754   also have "\<dots> = integral\<^sup>L (S \<Otimes>\<^sub>M T) ?g" |   1790   also have "\<dots> = integral\<^sup>L (S \<Otimes>\<^sub>M T) ?g" | 
|   1755     using \<open>AE x in _. ?f x = ?g x\<close> by (intro integral_cong_AE) auto |   1791     using \<open>AE x in _. ?f x = ?g x\<close> by (intro integral_cong_AE) auto | 
|   1756   also have "\<dots> = mutual_information b S T X Y" |   1792   also have "\<dots> = mutual_information b S T X Y" | 
|   1757     by (rule mutual_information_distr[OF S T Px Py Pxy, symmetric]) |   1793     by (rule mutual_information_distr[OF S T Px _ Py _ Pxy _ , symmetric]) | 
|         |   1794        (auto simp: space_pair_measure) | 
|   1758   finally show ?thesis .. |   1795   finally show ?thesis .. | 
|   1759 qed |   1796 qed | 
|   1760  |   1797  | 
|   1761 lemma (in information_space) mutual_information_eq_entropy_conditional_entropy': |   1798 lemma (in information_space) mutual_information_eq_entropy_conditional_entropy': | 
|   1762   fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" and Pxy :: "('b \<times> 'c) \<Rightarrow> real" |   1799   fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" and Pxy :: "('b \<times> 'c) \<Rightarrow> real" | 
|   1763   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" |   1800   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" | 
|   1764   assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" |   1801   assumes Px: "distributed M S X Px" "\<And>x. x \<in> space S \<Longrightarrow> 0 \<le> Px x" | 
|         |   1802     and Py: "distributed M T Y Py" "\<And>x. x \<in> space T \<Longrightarrow> 0 \<le> Py x" | 
|   1765   assumes Pxy: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy" |   1803   assumes Pxy: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy" | 
|         |   1804     "\<And>x. x \<in> space (S \<Otimes>\<^sub>M T) \<Longrightarrow> 0 \<le> Pxy x" | 
|   1766   assumes Ix: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Px (fst x)))" |   1805   assumes Ix: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Px (fst x)))" | 
|   1767   assumes Iy: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))" |   1806   assumes Iy: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))" | 
|   1768   assumes Ixy: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Pxy x))" |   1807   assumes Ixy: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Pxy x))" | 
|   1769   shows  "mutual_information b S T X Y = entropy b S X - conditional_entropy b S T X Y" |   1808   shows  "mutual_information b S T X Y = entropy b S X - conditional_entropy b S T X Y" | 
|   1770   using |   1809   using | 
|   1771     mutual_information_eq_entropy_conditional_entropy_distr[OF S T Px Py Pxy Ix Iy Ixy] |   1810     mutual_information_eq_entropy_conditional_entropy_distr[OF S T Px Py Pxy Ix Iy Ixy] | 
|   1772     conditional_entropy_eq_entropy[OF S T Py Pxy Ixy Iy] |   1811     conditional_entropy_eq_entropy[OF S T Py Pxy Ixy Iy] | 
|   1773   by simp |   1812   by (simp add: space_pair_measure) | 
|   1774  |   1813  | 
|   1775 lemma (in information_space) mutual_information_eq_entropy_conditional_entropy: |   1814 lemma (in information_space) mutual_information_eq_entropy_conditional_entropy: | 
|   1776   assumes sf_X: "simple_function M X" and sf_Y: "simple_function M Y" |   1815   assumes sf_X: "simple_function M X" and sf_Y: "simple_function M Y" | 
|   1777   shows  "\<I>(X ; Y) = \<H>(X) - \<H>(X | Y)" |   1816   shows  "\<I>(X ; Y) = \<H>(X) - \<H>(X | Y)" | 
|   1778 proof - |   1817 proof - | 
|   1779   have X: "simple_distributed M X (\<lambda>x. measure M (X -` {x} \<inter> space M))" |   1818   have X: "simple_distributed M X (\<lambda>x. measure M (X -` {x} \<inter> space M))" | 
|   1780     using sf_X by (rule simple_distributedI) auto |   1819     using sf_X by (rule simple_distributedI) (auto simp: measure_nonneg) | 
|   1781   have Y: "simple_distributed M Y (\<lambda>x. measure M (Y -` {x} \<inter> space M))" |   1820   have Y: "simple_distributed M Y (\<lambda>x. measure M (Y -` {x} \<inter> space M))" | 
|   1782     using sf_Y by (rule simple_distributedI) auto |   1821     using sf_Y by (rule simple_distributedI) (auto simp: measure_nonneg) | 
|   1783   have sf_XY: "simple_function M (\<lambda>x. (X x, Y x))" |   1822   have sf_XY: "simple_function M (\<lambda>x. (X x, Y x))" | 
|   1784     using sf_X sf_Y by (rule simple_function_Pair) |   1823     using sf_X sf_Y by (rule simple_function_Pair) | 
|   1785   then have XY: "simple_distributed M (\<lambda>x. (X x, Y x)) (\<lambda>x. measure M ((\<lambda>x. (X x, Y x)) -` {x} \<inter> space M))" |   1824   then have XY: "simple_distributed M (\<lambda>x. (X x, Y x)) (\<lambda>x. measure M ((\<lambda>x. (X x, Y x)) -` {x} \<inter> space M))" | 
|   1786     by (rule simple_distributedI) auto |   1825     by (rule simple_distributedI) (auto simp: measure_nonneg) | 
|   1787   from simple_distributed_joint_finite[OF this, simp] |   1826   from simple_distributed_joint_finite[OF this, simp] | 
|   1788   have eq: "count_space (X ` space M) \<Otimes>\<^sub>M count_space (Y ` space M) = count_space (X ` space M \<times> Y ` space M)" |   1827   have eq: "count_space (X ` space M) \<Otimes>\<^sub>M count_space (Y ` space M) = count_space (X ` space M \<times> Y ` space M)" | 
|   1789     by (simp add: pair_measure_count_space) |   1828     by (simp add: pair_measure_count_space) | 
|   1790  |   1829  | 
|   1791   have "\<I>(X ; Y) = \<H>(X) + \<H>(Y) - entropy b (count_space (X`space M) \<Otimes>\<^sub>M count_space (Y`space M)) (\<lambda>x. (X x, Y x))" |   1830   have "\<I>(X ; Y) = \<H>(X) + \<H>(Y) - entropy b (count_space (X`space M) \<Otimes>\<^sub>M count_space (Y`space M)) (\<lambda>x. (X x, Y x))" | 
|   1792     using sigma_finite_measure_count_space_finite sigma_finite_measure_count_space_finite simple_distributed[OF X] simple_distributed[OF Y] simple_distributed_joint[OF XY] |   1831     using sigma_finite_measure_count_space_finite | 
|   1793     by (rule mutual_information_eq_entropy_conditional_entropy_distr) (auto simp: eq integrable_count_space) |   1832       sigma_finite_measure_count_space_finite | 
|         |   1833       simple_distributed[OF X] measure_nonneg simple_distributed[OF Y] measure_nonneg simple_distributed_joint[OF XY] | 
|         |   1834     by (rule mutual_information_eq_entropy_conditional_entropy_distr) | 
|         |   1835        (auto simp: eq integrable_count_space measure_nonneg) | 
|   1794   then show ?thesis |   1836   then show ?thesis | 
|   1795     unfolding conditional_entropy_eq_entropy_simple[OF sf_X sf_Y] by simp |   1837     unfolding conditional_entropy_eq_entropy_simple[OF sf_X sf_Y] by simp | 
|   1796 qed |   1838 qed | 
|   1797  |   1839  | 
|   1798 lemma (in information_space) mutual_information_nonneg_simple: |   1840 lemma (in information_space) mutual_information_nonneg_simple: | 
|   1799   assumes sf_X: "simple_function M X" and sf_Y: "simple_function M Y" |   1841   assumes sf_X: "simple_function M X" and sf_Y: "simple_function M Y" | 
|   1800   shows  "0 \<le> \<I>(X ; Y)" |   1842   shows  "0 \<le> \<I>(X ; Y)" | 
|   1801 proof - |   1843 proof - | 
|   1802   have X: "simple_distributed M X (\<lambda>x. measure M (X -` {x} \<inter> space M))" |   1844   have X: "simple_distributed M X (\<lambda>x. measure M (X -` {x} \<inter> space M))" | 
|   1803     using sf_X by (rule simple_distributedI) auto |   1845     using sf_X by (rule simple_distributedI) (auto simp: measure_nonneg) | 
|   1804   have Y: "simple_distributed M Y (\<lambda>x. measure M (Y -` {x} \<inter> space M))" |   1846   have Y: "simple_distributed M Y (\<lambda>x. measure M (Y -` {x} \<inter> space M))" | 
|   1805     using sf_Y by (rule simple_distributedI) auto |   1847     using sf_Y by (rule simple_distributedI) (auto simp: measure_nonneg) | 
|   1806  |   1848  | 
|   1807   have sf_XY: "simple_function M (\<lambda>x. (X x, Y x))" |   1849   have sf_XY: "simple_function M (\<lambda>x. (X x, Y x))" | 
|   1808     using sf_X sf_Y by (rule simple_function_Pair) |   1850     using sf_X sf_Y by (rule simple_function_Pair) | 
|   1809   then have XY: "simple_distributed M (\<lambda>x. (X x, Y x)) (\<lambda>x. measure M ((\<lambda>x. (X x, Y x)) -` {x} \<inter> space M))" |   1851   then have XY: "simple_distributed M (\<lambda>x. (X x, Y x)) (\<lambda>x. measure M ((\<lambda>x. (X x, Y x)) -` {x} \<inter> space M))" | 
|   1810     by (rule simple_distributedI) auto |   1852     by (rule simple_distributedI) (auto simp: measure_nonneg) | 
|   1811  |   1853  | 
|   1812   from simple_distributed_joint_finite[OF this, simp] |   1854   from simple_distributed_joint_finite[OF this, simp] | 
|   1813   have eq: "count_space (X ` space M) \<Otimes>\<^sub>M count_space (Y ` space M) = count_space (X ` space M \<times> Y ` space M)" |   1855   have eq: "count_space (X ` space M) \<Otimes>\<^sub>M count_space (Y ` space M) = count_space (X ` space M \<times> Y ` space M)" | 
|   1814     by (simp add: pair_measure_count_space) |   1856     by (simp add: pair_measure_count_space) | 
|   1815  |   1857  | 
|   1816   show ?thesis |   1858   show ?thesis | 
|   1817     by (rule mutual_information_nonneg[OF _ _ simple_distributed[OF X] simple_distributed[OF Y] simple_distributed_joint[OF XY]]) |   1859     by (rule mutual_information_nonneg[OF _ _ simple_distributed[OF X] _ simple_distributed[OF Y] _ simple_distributed_joint[OF XY]]) | 
|   1818        (simp_all add: eq integrable_count_space sigma_finite_measure_count_space_finite) |   1860        (simp_all add: eq integrable_count_space sigma_finite_measure_count_space_finite measure_nonneg) | 
|   1819 qed |   1861 qed | 
|   1820  |   1862  | 
|   1821 lemma (in information_space) conditional_entropy_less_eq_entropy: |   1863 lemma (in information_space) conditional_entropy_less_eq_entropy: | 
|   1822   assumes X: "simple_function M X" and Z: "simple_function M Z" |   1864   assumes X: "simple_function M X" and Z: "simple_function M Z" | 
|   1823   shows "\<H>(X | Z) \<le> \<H>(X)" |   1865   shows "\<H>(X | Z) \<le> \<H>(X)" |