src/HOL/Probability/Information.thy
changeset 50003 8c213922ed49
parent 50002 ce0d316b5b44
child 50244 de72bbe42190
equal deleted inserted replaced
50002:ce0d316b5b44 50003:8c213922ed49
    82 lemma (in information_space) measurable_entropy_density:
    82 lemma (in information_space) measurable_entropy_density:
    83   assumes ac: "absolutely_continuous M N" "sets N = events"
    83   assumes ac: "absolutely_continuous M N" "sets N = events"
    84   shows "entropy_density b M N \<in> borel_measurable M"
    84   shows "entropy_density b M N \<in> borel_measurable M"
    85 proof -
    85 proof -
    86   from borel_measurable_RN_deriv[OF ac] b_gt_1 show ?thesis
    86   from borel_measurable_RN_deriv[OF ac] b_gt_1 show ?thesis
    87     unfolding entropy_density_def
    87     unfolding entropy_density_def by auto
    88     by (intro measurable_comp) auto
    88 qed
    89 qed
    89 
       
    90 lemma borel_measurable_RN_deriv_density[measurable (raw)]:
       
    91   "f \<in> borel_measurable M \<Longrightarrow> RN_deriv M (density M f) \<in> borel_measurable M"
       
    92   using borel_measurable_RN_deriv_density[of "\<lambda>x. max 0 (f x )" M]
       
    93   by (simp add: density_max_0[symmetric])
    90 
    94 
    91 lemma (in sigma_finite_measure) KL_density:
    95 lemma (in sigma_finite_measure) KL_density:
    92   fixes f :: "'a \<Rightarrow> real"
    96   fixes f :: "'a \<Rightarrow> real"
    93   assumes "1 < b"
    97   assumes "1 < b"
    94   assumes f: "f \<in> borel_measurable M" "AE x in M. 0 \<le> f x"
    98   assumes f: "f \<in> borel_measurable M" "AE x in M. 0 \<le> f x"
    95   shows "KL_divergence b M (density M f) = (\<integral>x. f x * log b (f x) \<partial>M)"
    99   shows "KL_divergence b M (density M f) = (\<integral>x. f x * log b (f x) \<partial>M)"
    96   unfolding KL_divergence_def
   100   unfolding KL_divergence_def
    97 proof (subst integral_density)
   101 proof (subst integral_density)
    98   show "entropy_density b M (density M (\<lambda>x. ereal (f x))) \<in> borel_measurable M"
   102   show "entropy_density b M (density M (\<lambda>x. ereal (f x))) \<in> borel_measurable M"
    99     using f
   103     using f
   100     by (auto simp: comp_def entropy_density_def intro!: borel_measurable_log borel_measurable_RN_deriv_density)
   104     by (auto simp: comp_def entropy_density_def)
   101   have "density M (RN_deriv M (density M f)) = density M f"
   105   have "density M (RN_deriv M (density M f)) = density M f"
   102     using f by (intro density_RN_deriv_density) auto
   106     using f by (intro density_RN_deriv_density) auto
   103   then have eq: "AE x in M. RN_deriv M (density M f) x = f x"
   107   then have eq: "AE x in M. RN_deriv M (density M f) x = f x"
   104     using f
   108     using f
   105     by (intro density_unique)
   109     by (intro density_unique)
   364   assumes T: "T \<in> measurable P Q" "absolutely_continuous Q (distr P Q T)"
   368   assumes T: "T \<in> measurable P Q" "absolutely_continuous Q (distr P Q T)"
   365   assumes g: "distributed M Q Y g"
   369   assumes g: "distributed M Q Y g"
   366   shows "AE x in P. 0 \<le> g (T x)"
   370   shows "AE x in P. 0 \<le> g (T x)"
   367   using g
   371   using g
   368   apply (subst AE_distr_iff[symmetric, OF T(1)])
   372   apply (subst AE_distr_iff[symmetric, OF T(1)])
   369   apply (simp add: distributed_borel_measurable)
   373   apply simp
   370   apply (rule absolutely_continuous_AE[OF _ T(2)])
   374   apply (rule absolutely_continuous_AE[OF _ T(2)])
   371   apply simp
   375   apply simp
   372   apply (simp add: distributed_AE)
   376   apply (simp add: distributed_AE)
   373   done
   377   done
   374 
   378 
   407 qed
   411 qed
   408 
   412 
   409 lemma distributed_integrable:
   413 lemma distributed_integrable:
   410   "distributed M N X f \<Longrightarrow> g \<in> borel_measurable N \<Longrightarrow>
   414   "distributed M N X f \<Longrightarrow> g \<in> borel_measurable N \<Longrightarrow>
   411     integrable N (\<lambda>x. f x * g x) \<longleftrightarrow> integrable M (\<lambda>x. g (X x))"
   415     integrable N (\<lambda>x. f x * g x) \<longleftrightarrow> integrable M (\<lambda>x. g (X x))"
   412   by (auto simp: distributed_real_measurable distributed_real_AE distributed_measurable
   416   by (auto simp: distributed_real_AE
   413                     distributed_distr_eq_density[symmetric] integral_density[symmetric] integrable_distr_eq)
   417                     distributed_distr_eq_density[symmetric] integral_density[symmetric] integrable_distr_eq)
   414   
   418   
   415 lemma distributed_transform_integrable:
   419 lemma distributed_transform_integrable:
   416   assumes Px: "distributed M N X Px"
   420   assumes Px: "distributed M N X Px"
   417   assumes "distributed M P Y Py"
   421   assumes "distributed M P Y Py"
   444     and "X = (\<lambda>x. f (Y x))"
   448     and "X = (\<lambda>x. f (Y x))"
   445     and "f \<in> measurable T S"
   449     and "f \<in> measurable T S"
   446   shows "integrable T (\<lambda>x. Py x * log b (Px (f x)))"
   450   shows "integrable T (\<lambda>x. Py x * log b (Px (f x)))"
   447   using assms unfolding finite_entropy_def
   451   using assms unfolding finite_entropy_def
   448   using distributed_transform_integrable[of M T Y Py S X Px f "\<lambda>x. log b (Px x)"]
   452   using distributed_transform_integrable[of M T Y Py S X Px f "\<lambda>x. log b (Px x)"]
   449   by (auto dest!: distributed_real_measurable)
   453   by auto
   450 
   454 
   451 subsection {* Mutual Information *}
   455 subsection {* Mutual Information *}
   452 
   456 
   453 definition (in prob_space)
   457 definition (in prob_space)
   454   "mutual_information b S T X Y =
   458   "mutual_information b S T X Y =
   462     (random_variable S X \<and> random_variable T Y \<and>
   466     (random_variable S X \<and> random_variable T Y \<and>
   463       absolutely_continuous P Q \<and> integrable Q (entropy_density b P Q) \<and>
   467       absolutely_continuous P Q \<and> integrable Q (entropy_density b P Q) \<and>
   464       mutual_information b S T X Y = 0)"
   468       mutual_information b S T X Y = 0)"
   465   unfolding indep_var_distribution_eq
   469   unfolding indep_var_distribution_eq
   466 proof safe
   470 proof safe
   467   assume rv: "random_variable S X" "random_variable T Y"
   471   assume rv[measurable]: "random_variable S X" "random_variable T Y"
   468 
   472 
   469   interpret X: prob_space "distr M S X"
   473   interpret X: prob_space "distr M S X"
   470     by (rule prob_space_distr) fact
   474     by (rule prob_space_distr) fact
   471   interpret Y: prob_space "distr M T Y"
   475   interpret Y: prob_space "distr M T Y"
   472     by (rule prob_space_distr) fact
   476     by (rule prob_space_distr) fact
   473   interpret XY: pair_prob_space "distr M S X" "distr M T Y" by default
   477   interpret XY: pair_prob_space "distr M S X" "distr M T Y" by default
   474   interpret P: information_space P b unfolding P_def by default (rule b_gt_1)
   478   interpret P: information_space P b unfolding P_def by default (rule b_gt_1)
   475 
   479 
   476   interpret Q: prob_space Q unfolding Q_def
   480   interpret Q: prob_space Q unfolding Q_def
   477     by (rule prob_space_distr) (simp add: comp_def measurable_pair_iff rv)
   481     by (rule prob_space_distr) simp
   478 
   482 
   479   { assume "distr M S X \<Otimes>\<^isub>M distr M T Y = distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))"
   483   { assume "distr M S X \<Otimes>\<^isub>M distr M T Y = distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))"
   480     then have [simp]: "Q = P"  unfolding Q_def P_def by simp
   484     then have [simp]: "Q = P"  unfolding Q_def P_def by simp
   481 
   485 
   482     show ac: "absolutely_continuous P Q" by (simp add: absolutely_continuous_def)
   486     show ac: "absolutely_continuous P Q" by (simp add: absolutely_continuous_def)
   534     using Fy by (auto simp: finite_entropy_def)
   538     using Fy by (auto simp: finite_entropy_def)
   535   have Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy"
   539   have Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy"
   536     using Fxy by (auto simp: finite_entropy_def)
   540     using Fxy by (auto simp: finite_entropy_def)
   537 
   541 
   538   have X: "random_variable S X"
   542   have X: "random_variable S X"
   539     using Px by (auto simp: distributed_def finite_entropy_def)
   543     using Px by auto
   540   have Y: "random_variable T Y"
   544   have Y: "random_variable T Y"
   541     using Py by (auto simp: distributed_def finite_entropy_def)
   545     using Py by auto
   542   interpret S: sigma_finite_measure S by fact
   546   interpret S: sigma_finite_measure S by fact
   543   interpret T: sigma_finite_measure T by fact
   547   interpret T: sigma_finite_measure T by fact
   544   interpret ST: pair_sigma_finite S T ..
   548   interpret ST: pair_sigma_finite S T ..
   545   interpret X: prob_space "distr M S X" using X by (rule prob_space_distr)
   549   interpret X: prob_space "distr M S X" using X by (rule prob_space_distr)
   546   interpret Y: prob_space "distr M T Y" using Y by (rule prob_space_distr)
   550   interpret Y: prob_space "distr M T Y" using Y by (rule prob_space_distr)
   566     unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] eq
   570     unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] eq
   567   proof (subst pair_measure_density)
   571   proof (subst pair_measure_density)
   568     show "(\<lambda>x. ereal (Px x)) \<in> borel_measurable S" "(\<lambda>y. ereal (Py y)) \<in> borel_measurable T"
   572     show "(\<lambda>x. ereal (Px x)) \<in> borel_measurable S" "(\<lambda>y. ereal (Py y)) \<in> borel_measurable T"
   569       "AE x in S. 0 \<le> ereal (Px x)" "AE y in T. 0 \<le> ereal (Py y)"
   573       "AE x in S. 0 \<le> ereal (Px x)" "AE y in T. 0 \<le> ereal (Py y)"
   570       using Px Py by (auto simp: distributed_def)
   574       using Px Py by (auto simp: distributed_def)
   571     show "sigma_finite_measure (density S Px)" unfolding Px(1)[THEN distributed_distr_eq_density, symmetric] ..
       
   572     show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] ..
   575     show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] ..
   573   qed (fact | simp)+
   576   qed (fact | simp)+
   574   
   577   
   575   have M: "?M = KL_divergence b (density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))) (density ?P (\<lambda>x. ereal (Pxy x)))"
   578   have M: "?M = KL_divergence b (density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))) (density ?P (\<lambda>x. ereal (Pxy x)))"
   576     unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] ..
   579     unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] ..
   673     unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] eq
   676     unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] eq
   674   proof (subst pair_measure_density)
   677   proof (subst pair_measure_density)
   675     show "(\<lambda>x. ereal (Px x)) \<in> borel_measurable S" "(\<lambda>y. ereal (Py y)) \<in> borel_measurable T"
   678     show "(\<lambda>x. ereal (Px x)) \<in> borel_measurable S" "(\<lambda>y. ereal (Py y)) \<in> borel_measurable T"
   676       "AE x in S. 0 \<le> ereal (Px x)" "AE y in T. 0 \<le> ereal (Py y)"
   679       "AE x in S. 0 \<le> ereal (Px x)" "AE y in T. 0 \<le> ereal (Py y)"
   677       using Px Py by (auto simp: distributed_def)
   680       using Px Py by (auto simp: distributed_def)
   678     show "sigma_finite_measure (density S Px)" unfolding Px(1)[THEN distributed_distr_eq_density, symmetric] ..
       
   679     show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] ..
   681     show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] ..
   680   qed (fact | simp)+
   682   qed (fact | simp)+
   681   
   683   
   682   have M: "?M = KL_divergence b (density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))) (density ?P (\<lambda>x. ereal (Pxy x)))"
   684   have M: "?M = KL_divergence b (density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))) (density ?P (\<lambda>x. ereal (Pxy x)))"
   683     unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] ..
   685     unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] ..
  1007   "\<I>(X ; Y | Z) \<equiv> conditional_mutual_information b
  1009   "\<I>(X ; Y | Z) \<equiv> conditional_mutual_information b
  1008     (count_space (X ` space M)) (count_space (Y ` space M)) (count_space (Z ` space M)) X Y Z"
  1010     (count_space (X ` space M)) (count_space (Y ` space M)) (count_space (Z ` space M)) X Y Z"
  1009 
  1011 
  1010 lemma (in information_space)
  1012 lemma (in information_space)
  1011   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" and P: "sigma_finite_measure P"
  1013   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" and P: "sigma_finite_measure P"
  1012   assumes Px: "distributed M S X Px"
  1014   assumes Px[measurable]: "distributed M S X Px"
  1013   assumes Pz: "distributed M P Z Pz"
  1015   assumes Pz[measurable]: "distributed M P Z Pz"
  1014   assumes Pyz: "distributed M (T \<Otimes>\<^isub>M P) (\<lambda>x. (Y x, Z x)) Pyz"
  1016   assumes Pyz[measurable]: "distributed M (T \<Otimes>\<^isub>M P) (\<lambda>x. (Y x, Z x)) Pyz"
  1015   assumes Pxz: "distributed M (S \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Z x)) Pxz"
  1017   assumes Pxz[measurable]: "distributed M (S \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Z x)) Pxz"
  1016   assumes Pxyz: "distributed M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x)) Pxyz"
  1018   assumes Pxyz[measurable]: "distributed M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x)) Pxyz"
  1017   assumes I1: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))"
  1019   assumes I1: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))"
  1018   assumes I2: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))"
  1020   assumes I2: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))"
  1019   shows conditional_mutual_information_generic_eq: "conditional_mutual_information b S T P X Y Z
  1021   shows conditional_mutual_information_generic_eq: "conditional_mutual_information b S T P X Y Z
  1020     = (\<integral>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" (is "?eq")
  1022     = (\<integral>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" (is "?eq")
  1021     and conditional_mutual_information_generic_nonneg: "0 \<le> conditional_mutual_information b S T P X Y Z" (is "?nonneg")
  1023     and conditional_mutual_information_generic_nonneg: "0 \<le> conditional_mutual_information b S T P X Y Z" (is "?nonneg")
  1031   interpret TPS: pair_sigma_finite "T \<Otimes>\<^isub>M P" S ..
  1033   interpret TPS: pair_sigma_finite "T \<Otimes>\<^isub>M P" S ..
  1032   have TP: "sigma_finite_measure (T \<Otimes>\<^isub>M P)" ..
  1034   have TP: "sigma_finite_measure (T \<Otimes>\<^isub>M P)" ..
  1033   have SP: "sigma_finite_measure (S \<Otimes>\<^isub>M P)" ..
  1035   have SP: "sigma_finite_measure (S \<Otimes>\<^isub>M P)" ..
  1034   have YZ: "random_variable (T \<Otimes>\<^isub>M P) (\<lambda>x. (Y x, Z x))"
  1036   have YZ: "random_variable (T \<Otimes>\<^isub>M P) (\<lambda>x. (Y x, Z x))"
  1035     using Pyz by (simp add: distributed_measurable)
  1037     using Pyz by (simp add: distributed_measurable)
  1036 
       
  1037   have Pxyz_f: "\<And>M f. f \<in> measurable M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) \<Longrightarrow> (\<lambda>x. Pxyz (f x)) \<in> borel_measurable M"
       
  1038     using measurable_comp[OF _ Pxyz[THEN distributed_real_measurable]] by (auto simp: comp_def)
       
  1039 
       
  1040   { fix f g h M
       
  1041     assume f: "f \<in> measurable M S" and g: "g \<in> measurable M P" and h: "h \<in> measurable M (S \<Otimes>\<^isub>M P)"
       
  1042     from measurable_comp[OF h Pxz[THEN distributed_real_measurable]]
       
  1043          measurable_comp[OF f Px[THEN distributed_real_measurable]]
       
  1044          measurable_comp[OF g Pz[THEN distributed_real_measurable]]
       
  1045     have "(\<lambda>x. log b (Pxz (h x) / (Px (f x) * Pz (g x)))) \<in> borel_measurable M"
       
  1046       by (simp add: comp_def b_gt_1) }
       
  1047   note borel_log = this
       
  1048 
       
  1049   have measurable_cut: "(\<lambda>(x, y, z). (x, z)) \<in> measurable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (S \<Otimes>\<^isub>M P)"
       
  1050     by (auto simp add: split_beta' comp_def intro!: measurable_Pair measurable_snd')
       
  1051   
  1038   
  1052   from Pxz Pxyz have distr_eq: "distr M (S \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Z x)) =
  1039   from Pxz Pxyz have distr_eq: "distr M (S \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Z x)) =
  1053     distr (distr M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x))) (S \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). (x, z))"
  1040     distr (distr M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x))) (S \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). (x, z))"
  1054     by (subst distr_distr[OF measurable_cut]) (auto dest: distributed_measurable simp: comp_def)
  1041     by (simp add: comp_def distr_distr)
  1055 
  1042 
  1056   have "mutual_information b S P X Z =
  1043   have "mutual_information b S P X Z =
  1057     (\<integral>x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \<partial>(S \<Otimes>\<^isub>M P))"
  1044     (\<integral>x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \<partial>(S \<Otimes>\<^isub>M P))"
  1058     by (rule mutual_information_distr[OF S P Px Pz Pxz])
  1045     by (rule mutual_information_distr[OF S P Px Pz Pxz])
  1059   also have "\<dots> = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))"
  1046   also have "\<dots> = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))"
  1060     using b_gt_1 Pxz Px Pz
  1047     using b_gt_1 Pxz Px Pz
  1061     by (subst distributed_transform_integral[OF Pxyz Pxz, where T="\<lambda>(x, y, z). (x, z)"])
  1048     by (subst distributed_transform_integral[OF Pxyz Pxz, where T="\<lambda>(x, y, z). (x, z)"]) (auto simp: split_beta')
  1062        (auto simp: split_beta' intro!: measurable_Pair measurable_snd' measurable_snd'' measurable_fst'' borel_measurable_times
       
  1063              dest!: distributed_real_measurable)
       
  1064   finally have mi_eq:
  1049   finally have mi_eq:
  1065     "mutual_information b S P X Z = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" .
  1050     "mutual_information b S P X Z = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" .
  1066   
  1051   
  1067   have ae1: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Px (fst x) = 0 \<longrightarrow> Pxyz x = 0"
  1052   have ae1: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Px (fst x) = 0 \<longrightarrow> Pxyz x = 0"
  1068     by (intro subdensity_real[of fst, OF _ Pxyz Px]) auto
  1053     by (intro subdensity_real[of fst, OF _ Pxyz Px]) auto
  1069   moreover have ae2: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pz (snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0"
  1054   moreover have ae2: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pz (snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0"
  1070     by (intro subdensity_real[of "\<lambda>x. snd (snd x)", OF _ Pxyz Pz]) (auto intro: measurable_snd')
  1055     by (intro subdensity_real[of "\<lambda>x. snd (snd x)", OF _ Pxyz Pz]) auto
  1071   moreover have ae3: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pxz (fst x, snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0"
  1056   moreover have ae3: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pxz (fst x, snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0"
  1072     by (intro subdensity_real[of "\<lambda>x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) (auto intro: measurable_Pair measurable_snd')
  1057     by (intro subdensity_real[of "\<lambda>x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) auto
  1073   moreover have ae4: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pyz (snd x) = 0 \<longrightarrow> Pxyz x = 0"
  1058   moreover have ae4: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pyz (snd x) = 0 \<longrightarrow> Pxyz x = 0"
  1074     by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) (auto intro: measurable_Pair)
  1059     by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) auto
  1075   moreover have ae5: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Px (fst x)"
  1060   moreover have ae5: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Px (fst x)"
  1076     using Px by (intro STP.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable)
  1061     using Px by (intro STP.AE_pair_measure) (auto simp: comp_def dest: distributed_real_AE)
  1077   moreover have ae6: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pyz (snd x)"
  1062   moreover have ae6: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pyz (snd x)"
  1078     using Pyz by (intro STP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable)
  1063     using Pyz by (intro STP.AE_pair_measure) (auto simp: comp_def dest: distributed_real_AE)
  1079   moreover have ae7: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd (snd x))"
  1064   moreover have ae7: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd (snd x))"
  1080     using Pz Pz[THEN distributed_real_measurable] by (auto intro!: measurable_snd'' TP.AE_pair_measure STP.AE_pair_measure AE_I2[of S] dest: distributed_real_AE)
  1065     using Pz Pz[THEN distributed_real_measurable]
       
  1066     by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure AE_I2[of S] dest: distributed_real_AE)
  1081   moreover have ae8: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pxz (fst x, snd (snd x))"
  1067   moreover have ae8: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pxz (fst x, snd (snd x))"
  1082     using Pxz[THEN distributed_real_AE, THEN SP.AE_pair]
  1068     using Pxz[THEN distributed_real_AE, THEN SP.AE_pair]
  1083     using measurable_comp[OF measurable_Pair[OF measurable_fst measurable_comp[OF measurable_snd measurable_snd]] Pxz[THEN distributed_real_measurable], of T]
  1069     by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure)
  1084     using measurable_comp[OF measurable_snd measurable_Pair2[OF Pxz[THEN distributed_real_measurable]], of _ T]
       
  1085     by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure simp: comp_def)
       
  1086   moreover note Pxyz[THEN distributed_real_AE]
  1070   moreover note Pxyz[THEN distributed_real_AE]
  1087   ultimately have ae: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P.
  1071   ultimately have ae: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P.
  1088     Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) -
  1072     Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) -
  1089     Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) =
  1073     Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) =
  1090     Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) "
  1074     Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) "
  1108     done
  1092     done
  1109 
  1093 
  1110   let ?P = "density (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) Pxyz"
  1094   let ?P = "density (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) Pxyz"
  1111   interpret P: prob_space ?P
  1095   interpret P: prob_space ?P
  1112     unfolding distributed_distr_eq_density[OF Pxyz, symmetric]
  1096     unfolding distributed_distr_eq_density[OF Pxyz, symmetric]
  1113     using distributed_measurable[OF Pxyz] by (rule prob_space_distr)
  1097     by (rule prob_space_distr) simp
  1114 
  1098 
  1115   let ?Q = "density (T \<Otimes>\<^isub>M P) Pyz"
  1099   let ?Q = "density (T \<Otimes>\<^isub>M P) Pyz"
  1116   interpret Q: prob_space ?Q
  1100   interpret Q: prob_space ?Q
  1117     unfolding distributed_distr_eq_density[OF Pyz, symmetric]
  1101     unfolding distributed_distr_eq_density[OF Pyz, symmetric]
  1118     using distributed_measurable[OF Pyz] by (rule prob_space_distr)
  1102     by (rule prob_space_distr) simp
  1119 
  1103 
  1120   let ?f = "\<lambda>(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)"
  1104   let ?f = "\<lambda>(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)"
  1121 
  1105 
  1122   from subdensity_real[of snd, OF _ Pyz Pz]
  1106   from subdensity_real[of snd, OF _ Pyz Pz]
  1123   have aeX1: "AE x in T \<Otimes>\<^isub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" by (auto simp: comp_def)
  1107   have aeX1: "AE x in T \<Otimes>\<^isub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" by (auto simp: comp_def)
  1124   have aeX2: "AE x in T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd x)"
  1108   have aeX2: "AE x in T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd x)"
  1125     using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable)
  1109     using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def dest: distributed_real_AE)
  1126 
  1110 
  1127   have aeX3: "AE y in T \<Otimes>\<^isub>M P. (\<integral>\<^isup>+ x. ereal (Pxz (x, snd y)) \<partial>S) = ereal (Pz (snd y))"
  1111   have aeX3: "AE y in T \<Otimes>\<^isub>M P. (\<integral>\<^isup>+ x. ereal (Pxz (x, snd y)) \<partial>S) = ereal (Pz (snd y))"
  1128     using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz]
  1112     using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz]
  1129     apply (intro TP.AE_pair_measure)
  1113     by (intro TP.AE_pair_measure) (auto dest: distributed_real_AE)
  1130     apply (auto simp: comp_def measurable_split_conv
  1114 
  1131                 intro!: measurable_snd'' measurable_fst'' borel_measurable_ereal_eq borel_measurable_ereal
       
  1132                         S.borel_measurable_positive_integral measurable_compose[OF _ Pxz[THEN distributed_real_measurable]]
       
  1133                         measurable_Pair
       
  1134                 dest: distributed_real_AE distributed_real_measurable)
       
  1135     done
       
  1136 
       
  1137   note M = borel_measurable_divide borel_measurable_diff borel_measurable_times borel_measurable_ereal
       
  1138            measurable_compose[OF _ measurable_snd]
       
  1139            measurable_Pair
       
  1140            measurable_compose[OF _ Pxyz[THEN distributed_real_measurable]]
       
  1141            measurable_compose[OF _ Pxz[THEN distributed_real_measurable]]
       
  1142            measurable_compose[OF _ Pyz[THEN distributed_real_measurable]]
       
  1143            measurable_compose[OF _ Pz[THEN distributed_real_measurable]]
       
  1144            measurable_compose[OF _ Px[THEN distributed_real_measurable]]
       
  1145            TP.borel_measurable_positive_integral
       
  1146   have "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<le> (\<integral>\<^isup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))"
  1115   have "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<le> (\<integral>\<^isup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))"
  1147     apply (subst positive_integral_density)
  1116     apply (subst positive_integral_density)
  1148     apply (rule distributed_borel_measurable[OF Pxyz])
  1117     apply simp
  1149     apply (rule distributed_AE[OF Pxyz])
  1118     apply (rule distributed_AE[OF Pxyz])
  1150     apply (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M) []
  1119     apply auto []
  1151     apply (rule positive_integral_mono_AE)
  1120     apply (rule positive_integral_mono_AE)
  1152     using ae5 ae6 ae7 ae8
  1121     using ae5 ae6 ae7 ae8
  1153     apply eventually_elim
  1122     apply eventually_elim
  1154     apply (auto intro!: divide_nonneg_nonneg mult_nonneg_nonneg)
  1123     apply (auto intro!: divide_nonneg_nonneg mult_nonneg_nonneg)
  1155     done
  1124     done
  1156   also have "\<dots> = (\<integral>\<^isup>+(y, z). \<integral>\<^isup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \<partial>S \<partial>T \<Otimes>\<^isub>M P)"
  1125   also have "\<dots> = (\<integral>\<^isup>+(y, z). \<integral>\<^isup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \<partial>S \<partial>T \<Otimes>\<^isub>M P)"
  1157     by (subst STP.positive_integral_snd_measurable[symmetric])
  1126     by (subst STP.positive_integral_snd_measurable[symmetric]) (auto simp add: split_beta')
  1158        (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M)
       
  1159   also have "\<dots> = (\<integral>\<^isup>+x. ereal (Pyz x) * 1 \<partial>T \<Otimes>\<^isub>M P)"
  1127   also have "\<dots> = (\<integral>\<^isup>+x. ereal (Pyz x) * 1 \<partial>T \<Otimes>\<^isub>M P)"
  1160     apply (rule positive_integral_cong_AE)
  1128     apply (rule positive_integral_cong_AE)
  1161     using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space
  1129     using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space
  1162     apply eventually_elim
  1130     apply eventually_elim
  1163   proof (case_tac x, simp del: times_ereal.simps add: space_pair_measure)
  1131   proof (case_tac x, simp del: times_ereal.simps add: space_pair_measure)
  1164     fix a b assume "Pz b = 0 \<longrightarrow> Pyz (a, b) = 0" "0 \<le> Pz b" "a \<in> space T \<and> b \<in> space P"
  1132     fix a b assume "Pz b = 0 \<longrightarrow> Pyz (a, b) = 0" "0 \<le> Pz b" "a \<in> space T \<and> b \<in> space P"
  1165       "(\<integral>\<^isup>+ x. ereal (Pxz (x, b)) \<partial>S) = ereal (Pz b)" "0 \<le> Pyz (a, b)" 
  1133       "(\<integral>\<^isup>+ x. ereal (Pxz (x, b)) \<partial>S) = ereal (Pz b)" "0 \<le> Pyz (a, b)" 
  1166     then show "(\<integral>\<^isup>+ x. ereal (Pxz (x, b)) * ereal (Pyz (a, b) / Pz b) \<partial>S) = ereal (Pyz (a, b))"
  1134     then show "(\<integral>\<^isup>+ x. ereal (Pxz (x, b)) * ereal (Pyz (a, b) / Pz b) \<partial>S) = ereal (Pyz (a, b))"
  1167       apply (subst positive_integral_multc)
  1135       by (subst positive_integral_multc)
  1168       apply (auto intro!: borel_measurable_ereal divide_nonneg_nonneg
  1136          (auto intro!: divide_nonneg_nonneg split: prod.split)
  1169                           measurable_compose[OF _ Pxz[THEN distributed_real_measurable]] measurable_Pair
       
  1170                   split: prod.split)
       
  1171       done
       
  1172   qed
  1137   qed
  1173   also have "\<dots> = 1"
  1138   also have "\<dots> = 1"
  1174     using Q.emeasure_space_1 distributed_AE[OF Pyz] distributed_distr_eq_density[OF Pyz]
  1139     using Q.emeasure_space_1 distributed_AE[OF Pyz] distributed_distr_eq_density[OF Pyz]
  1175     by (subst positive_integral_density[symmetric]) (auto intro!: M)
  1140     by (subst positive_integral_density[symmetric]) auto
  1176   finally have le1: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<le> 1" .
  1141   finally have le1: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<le> 1" .
  1177   also have "\<dots> < \<infinity>" by simp
  1142   also have "\<dots> < \<infinity>" by simp
  1178   finally have fin: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<noteq> \<infinity>" by simp
  1143   finally have fin: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<noteq> \<infinity>" by simp
  1179 
  1144 
  1180   have pos: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<noteq> 0"
  1145   have pos: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<noteq> 0"
  1181     apply (subst positive_integral_density)
  1146     apply (subst positive_integral_density)
  1182     apply (rule distributed_borel_measurable[OF Pxyz])
  1147     apply simp
  1183     apply (rule distributed_AE[OF Pxyz])
  1148     apply (rule distributed_AE[OF Pxyz])
  1184     apply (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M) []
  1149     apply auto []
  1185     apply (simp add: split_beta')
  1150     apply (simp add: split_beta')
  1186   proof
  1151   proof
  1187     let ?g = "\<lambda>x. ereal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))"
  1152     let ?g = "\<lambda>x. ereal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))"
  1188     assume "(\<integral>\<^isup>+ x. ?g x \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P)) = 0"
  1153     assume "(\<integral>\<^isup>+ x. ?g x \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P)) = 0"
  1189     then have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. ?g x \<le> 0"
  1154     then have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. ?g x \<le> 0"
  1190       by (intro positive_integral_0_iff_AE[THEN iffD1]) (auto intro!: M borel_measurable_ereal measurable_If)
  1155       by (intro positive_integral_0_iff_AE[THEN iffD1]) auto
  1191     then have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pxyz x = 0"
  1156     then have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pxyz x = 0"
  1192       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1157       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1193       by eventually_elim (auto split: split_if_asm simp: mult_le_0_iff divide_le_0_iff)
  1158       by eventually_elim (auto split: split_if_asm simp: mult_le_0_iff divide_le_0_iff)
  1194     then have "(\<integral>\<^isup>+ x. ereal (Pxyz x) \<partial>S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) = 0"
  1159     then have "(\<integral>\<^isup>+ x. ereal (Pxyz x) \<partial>S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) = 0"
  1195       by (subst positive_integral_cong_AE[of _ "\<lambda>x. 0"]) auto
  1160       by (subst positive_integral_cong_AE[of _ "\<lambda>x. 0"]) auto
  1196     with P.emeasure_space_1 show False
  1161     with P.emeasure_space_1 show False
  1197       by (subst (asm) emeasure_density) (auto intro!: M cong: positive_integral_cong)
  1162       by (subst (asm) emeasure_density) (auto cong: positive_integral_cong)
  1198   qed
  1163   qed
  1199 
  1164 
  1200   have neg: "(\<integral>\<^isup>+ x. - ?f x \<partial>?P) = 0"
  1165   have neg: "(\<integral>\<^isup>+ x. - ?f x \<partial>?P) = 0"
  1201     apply (rule positive_integral_0_iff_AE[THEN iffD2])
  1166     apply (rule positive_integral_0_iff_AE[THEN iffD2])
  1202     apply (auto intro!: M simp: split_beta') []
  1167     apply simp
  1203     apply (subst AE_density)
  1168     apply (subst AE_density)
  1204     apply (auto intro!: M simp: split_beta') []
  1169     apply simp
  1205     using ae5 ae6 ae7 ae8
  1170     using ae5 ae6 ae7 ae8
  1206     apply eventually_elim
  1171     apply eventually_elim
  1207     apply (auto intro!: mult_nonneg_nonneg divide_nonneg_nonneg)
  1172     apply (auto intro!: mult_nonneg_nonneg divide_nonneg_nonneg)
  1208     done
  1173     done
  1209 
  1174 
  1210   have I3: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))"
  1175   have I3: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))"
  1211     apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integral_diff(1)[OF I1 I2]])
  1176     apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integral_diff(1)[OF I1 I2]])
  1212     using ae
  1177     using ae
  1213     apply (auto intro!: M simp: split_beta')
  1178     apply (auto simp: split_beta')
  1214     done
  1179     done
  1215 
  1180 
  1216   have "- log b 1 \<le> - log b (integral\<^isup>L ?P ?f)"
  1181   have "- log b 1 \<le> - log b (integral\<^isup>L ?P ?f)"
  1217   proof (intro le_imp_neg_le log_le[OF b_gt_1])
  1182   proof (intro le_imp_neg_le log_le[OF b_gt_1])
  1218     show "0 < integral\<^isup>L ?P ?f"
  1183     show "0 < integral\<^isup>L ?P ?f"
  1228       unfolding AE_density[OF distributed_borel_measurable[OF Pxyz]]
  1193       unfolding AE_density[OF distributed_borel_measurable[OF Pxyz]]
  1229       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1194       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1230       by eventually_elim (auto simp: divide_pos_pos mult_pos_pos)
  1195       by eventually_elim (auto simp: divide_pos_pos mult_pos_pos)
  1231     show "integrable ?P ?f"
  1196     show "integrable ?P ?f"
  1232       unfolding integrable_def 
  1197       unfolding integrable_def 
  1233       using fin neg by (auto intro!: M simp: split_beta')
  1198       using fin neg by (auto simp: split_beta')
  1234     show "integrable ?P (\<lambda>x. - log b (?f x))"
  1199     show "integrable ?P (\<lambda>x. - log b (?f x))"
  1235       apply (subst integral_density)
  1200       apply (subst integral_density)
  1236       apply (auto intro!: M) []
  1201       apply simp
  1237       apply (auto intro!: M distributed_real_AE[OF Pxyz]) []
  1202       apply (auto intro!: distributed_real_AE[OF Pxyz]) []
  1238       apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') []
  1203       apply simp
  1239       apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3])
  1204       apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3])
  1240       apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') []
  1205       apply simp
  1241       apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') []
  1206       apply simp
  1242       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1207       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1243       apply eventually_elim
  1208       apply eventually_elim
  1244       apply (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps)
  1209       apply (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps)
  1245       done
  1210       done
  1246   qed (auto simp: b_gt_1 minus_log_convex)
  1211   qed (auto simp: b_gt_1 minus_log_convex)
  1247   also have "\<dots> = conditional_mutual_information b S T P X Y Z"
  1212   also have "\<dots> = conditional_mutual_information b S T P X Y Z"
  1248     unfolding `?eq`
  1213     unfolding `?eq`
  1249     apply (subst integral_density)
  1214     apply (subst integral_density)
  1250     apply (auto intro!: M) []
  1215     apply simp
  1251     apply (auto intro!: M distributed_real_AE[OF Pxyz]) []
  1216     apply (auto intro!: distributed_real_AE[OF Pxyz]) []
  1252     apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') []
  1217     apply simp
  1253     apply (intro integral_cong_AE)
  1218     apply (intro integral_cong_AE)
  1254     using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1219     using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1255     apply eventually_elim
  1220     apply eventually_elim
  1256     apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps)
  1221     apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps)
  1257     done
  1222     done
  1269   assumes Fxyz: "finite_entropy (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x)) Pxyz"
  1234   assumes Fxyz: "finite_entropy (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x)) Pxyz"
  1270   shows conditional_mutual_information_generic_eq': "conditional_mutual_information b S T P X Y Z
  1235   shows conditional_mutual_information_generic_eq': "conditional_mutual_information b S T P X Y Z
  1271     = (\<integral>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" (is "?eq")
  1236     = (\<integral>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" (is "?eq")
  1272     and conditional_mutual_information_generic_nonneg': "0 \<le> conditional_mutual_information b S T P X Y Z" (is "?nonneg")
  1237     and conditional_mutual_information_generic_nonneg': "0 \<le> conditional_mutual_information b S T P X Y Z" (is "?nonneg")
  1273 proof -
  1238 proof -
  1274   note Px = Fx[THEN finite_entropy_distributed]
  1239   note Px = Fx[THEN finite_entropy_distributed, measurable]
  1275   note Pz = Fz[THEN finite_entropy_distributed]
  1240   note Pz = Fz[THEN finite_entropy_distributed, measurable]
  1276   note Pyz = Fyz[THEN finite_entropy_distributed]
  1241   note Pyz = Fyz[THEN finite_entropy_distributed, measurable]
  1277   note Pxz = Fxz[THEN finite_entropy_distributed]
  1242   note Pxz = Fxz[THEN finite_entropy_distributed, measurable]
  1278   note Pxyz = Fxyz[THEN finite_entropy_distributed]
  1243   note Pxyz = Fxyz[THEN finite_entropy_distributed, measurable]
  1279 
  1244 
  1280   interpret S: sigma_finite_measure S by fact
  1245   interpret S: sigma_finite_measure S by fact
  1281   interpret T: sigma_finite_measure T by fact
  1246   interpret T: sigma_finite_measure T by fact
  1282   interpret P: sigma_finite_measure P by fact
  1247   interpret P: sigma_finite_measure P by fact
  1283   interpret TP: pair_sigma_finite T P ..
  1248   interpret TP: pair_sigma_finite T P ..
  1286   interpret SPT: pair_sigma_finite "S \<Otimes>\<^isub>M P" T ..
  1251   interpret SPT: pair_sigma_finite "S \<Otimes>\<^isub>M P" T ..
  1287   interpret STP: pair_sigma_finite S "T \<Otimes>\<^isub>M P" ..
  1252   interpret STP: pair_sigma_finite S "T \<Otimes>\<^isub>M P" ..
  1288   interpret TPS: pair_sigma_finite "T \<Otimes>\<^isub>M P" S ..
  1253   interpret TPS: pair_sigma_finite "T \<Otimes>\<^isub>M P" S ..
  1289   have TP: "sigma_finite_measure (T \<Otimes>\<^isub>M P)" ..
  1254   have TP: "sigma_finite_measure (T \<Otimes>\<^isub>M P)" ..
  1290   have SP: "sigma_finite_measure (S \<Otimes>\<^isub>M P)" ..
  1255   have SP: "sigma_finite_measure (S \<Otimes>\<^isub>M P)" ..
  1291   have YZ: "random_variable (T \<Otimes>\<^isub>M P) (\<lambda>x. (Y x, Z x))"
  1256 
  1292     using Pyz by (simp add: distributed_measurable)
       
  1293 
       
  1294   have Pxyz_f: "\<And>M f. f \<in> measurable M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) \<Longrightarrow> (\<lambda>x. Pxyz (f x)) \<in> borel_measurable M"
       
  1295     using measurable_comp[OF _ Pxyz[THEN distributed_real_measurable]] by (auto simp: comp_def)
       
  1296 
       
  1297   { fix f g h M
       
  1298     assume f: "f \<in> measurable M S" and g: "g \<in> measurable M P" and h: "h \<in> measurable M (S \<Otimes>\<^isub>M P)"
       
  1299     from measurable_comp[OF h Pxz[THEN distributed_real_measurable]]
       
  1300          measurable_comp[OF f Px[THEN distributed_real_measurable]]
       
  1301          measurable_comp[OF g Pz[THEN distributed_real_measurable]]
       
  1302     have "(\<lambda>x. log b (Pxz (h x) / (Px (f x) * Pz (g x)))) \<in> borel_measurable M"
       
  1303       by (simp add: comp_def b_gt_1) }
       
  1304   note borel_log = this
       
  1305 
       
  1306   have measurable_cut: "(\<lambda>(x, y, z). (x, z)) \<in> measurable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (S \<Otimes>\<^isub>M P)"
       
  1307     by (auto simp add: split_beta' comp_def intro!: measurable_Pair measurable_snd')
       
  1308   
       
  1309   from Pxz Pxyz have distr_eq: "distr M (S \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Z x)) =
  1257   from Pxz Pxyz have distr_eq: "distr M (S \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Z x)) =
  1310     distr (distr M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x))) (S \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). (x, z))"
  1258     distr (distr M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x))) (S \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). (x, z))"
  1311     by (subst distr_distr[OF measurable_cut]) (auto dest: distributed_measurable simp: comp_def)
  1259     by (simp add: distr_distr comp_def)
  1312 
  1260 
  1313   have "mutual_information b S P X Z =
  1261   have "mutual_information b S P X Z =
  1314     (\<integral>x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \<partial>(S \<Otimes>\<^isub>M P))"
  1262     (\<integral>x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \<partial>(S \<Otimes>\<^isub>M P))"
  1315     by (rule mutual_information_distr[OF S P Px Pz Pxz])
  1263     by (rule mutual_information_distr[OF S P Px Pz Pxz])
  1316   also have "\<dots> = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))"
  1264   also have "\<dots> = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))"
  1317     using b_gt_1 Pxz Px Pz
  1265     using b_gt_1 Pxz Px Pz
  1318     by (subst distributed_transform_integral[OF Pxyz Pxz, where T="\<lambda>(x, y, z). (x, z)"])
  1266     by (subst distributed_transform_integral[OF Pxyz Pxz, where T="\<lambda>(x, y, z). (x, z)"])
  1319        (auto simp: split_beta' intro!: measurable_Pair measurable_snd' measurable_snd'' measurable_fst'' borel_measurable_times
  1267        (auto simp: split_beta')
  1320              dest!: distributed_real_measurable)
       
  1321   finally have mi_eq:
  1268   finally have mi_eq:
  1322     "mutual_information b S P X Z = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" .
  1269     "mutual_information b S P X Z = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" .
  1323   
  1270   
  1324   have ae1: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Px (fst x) = 0 \<longrightarrow> Pxyz x = 0"
  1271   have ae1: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Px (fst x) = 0 \<longrightarrow> Pxyz x = 0"
  1325     by (intro subdensity_real[of fst, OF _ Pxyz Px]) auto
  1272     by (intro subdensity_real[of fst, OF _ Pxyz Px]) auto
  1326   moreover have ae2: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pz (snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0"
  1273   moreover have ae2: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pz (snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0"
  1327     by (intro subdensity_real[of "\<lambda>x. snd (snd x)", OF _ Pxyz Pz]) (auto intro: measurable_snd')
  1274     by (intro subdensity_real[of "\<lambda>x. snd (snd x)", OF _ Pxyz Pz]) auto
  1328   moreover have ae3: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pxz (fst x, snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0"
  1275   moreover have ae3: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pxz (fst x, snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0"
  1329     by (intro subdensity_real[of "\<lambda>x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) (auto intro: measurable_Pair measurable_snd')
  1276     by (intro subdensity_real[of "\<lambda>x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) auto
  1330   moreover have ae4: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pyz (snd x) = 0 \<longrightarrow> Pxyz x = 0"
  1277   moreover have ae4: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pyz (snd x) = 0 \<longrightarrow> Pxyz x = 0"
  1331     by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) (auto intro: measurable_Pair)
  1278     by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) auto
  1332   moreover have ae5: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Px (fst x)"
  1279   moreover have ae5: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Px (fst x)"
  1333     using Px by (intro STP.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable)
  1280     using Px by (intro STP.AE_pair_measure) (auto dest: distributed_real_AE)
  1334   moreover have ae6: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pyz (snd x)"
  1281   moreover have ae6: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pyz (snd x)"
  1335     using Pyz by (intro STP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable)
  1282     using Pyz by (intro STP.AE_pair_measure) (auto dest: distributed_real_AE)
  1336   moreover have ae7: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd (snd x))"
  1283   moreover have ae7: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd (snd x))"
  1337     using Pz Pz[THEN distributed_real_measurable] by (auto intro!: measurable_snd'' TP.AE_pair_measure STP.AE_pair_measure AE_I2[of S] dest: distributed_real_AE)
  1284     using Pz Pz[THEN distributed_real_measurable] by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure AE_I2[of S] dest: distributed_real_AE)
  1338   moreover have ae8: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pxz (fst x, snd (snd x))"
  1285   moreover have ae8: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pxz (fst x, snd (snd x))"
  1339     using Pxz[THEN distributed_real_AE, THEN SP.AE_pair]
  1286     using Pxz[THEN distributed_real_AE, THEN SP.AE_pair]
  1340     using measurable_comp[OF measurable_Pair[OF measurable_fst measurable_comp[OF measurable_snd measurable_snd]] Pxz[THEN distributed_real_measurable], of T]
       
  1341     using measurable_comp[OF measurable_snd measurable_Pair2[OF Pxz[THEN distributed_real_measurable]], of _ T]
       
  1342     by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure simp: comp_def)
  1287     by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure simp: comp_def)
  1343   moreover note ae9 = Pxyz[THEN distributed_real_AE]
  1288   moreover note ae9 = Pxyz[THEN distributed_real_AE]
  1344   ultimately have ae: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P.
  1289   ultimately have ae: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P.
  1345     Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) -
  1290     Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) -
  1346     Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) =
  1291     Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) =
  1362     using finite_entropy_integrable[OF Fxyz]
  1307     using finite_entropy_integrable[OF Fxyz]
  1363     using finite_entropy_integrable_transform[OF Fx Pxyz, of fst]
  1308     using finite_entropy_integrable_transform[OF Fx Pxyz, of fst]
  1364     using finite_entropy_integrable_transform[OF Fyz Pxyz, of snd]
  1309     using finite_entropy_integrable_transform[OF Fyz Pxyz, of snd]
  1365     by simp
  1310     by simp
  1366   moreover have "(\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z)))) \<in> borel_measurable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P)"
  1311   moreover have "(\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z)))) \<in> borel_measurable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P)"
  1367     using Pxyz Px Pyz
  1312     using Pxyz Px Pyz by simp
  1368     by (auto intro!: borel_measurable_times measurable_fst'' measurable_snd'' dest!: distributed_real_measurable simp: split_beta')
       
  1369   ultimately have I1: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))"
  1313   ultimately have I1: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))"
  1370     apply (rule integrable_cong_AE_imp)
  1314     apply (rule integrable_cong_AE_imp)
  1371     using ae1 ae4 ae5 ae6 ae9
  1315     using ae1 ae4 ae5 ae6 ae9
  1372     by eventually_elim
  1316     by eventually_elim
  1373        (auto simp: log_divide_eq log_mult_eq mult_nonneg_nonneg field_simps zero_less_mult_iff)
  1317        (auto simp: log_divide_eq log_mult_eq mult_nonneg_nonneg field_simps zero_less_mult_iff)
  1375   have "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P)
  1319   have "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P)
  1376     (\<lambda>x. Pxyz x * log b (Pxz (fst x, snd (snd x))) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pz (snd (snd x))))"
  1320     (\<lambda>x. Pxyz x * log b (Pxz (fst x, snd (snd x))) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pz (snd (snd x))))"
  1377     using finite_entropy_integrable_transform[OF Fxz Pxyz, of "\<lambda>x. (fst x, snd (snd x))"]
  1321     using finite_entropy_integrable_transform[OF Fxz Pxyz, of "\<lambda>x. (fst x, snd (snd x))"]
  1378     using finite_entropy_integrable_transform[OF Fx Pxyz, of fst]
  1322     using finite_entropy_integrable_transform[OF Fx Pxyz, of fst]
  1379     using finite_entropy_integrable_transform[OF Fz Pxyz, of "snd \<circ> snd"]
  1323     using finite_entropy_integrable_transform[OF Fz Pxyz, of "snd \<circ> snd"]
  1380     by (simp add: measurable_Pair measurable_snd'' comp_def)
  1324     by simp
  1381   moreover have "(\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z))) \<in> borel_measurable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P)"
  1325   moreover have "(\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z))) \<in> borel_measurable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P)"
  1382     using Pxyz Px Pz
  1326     using Pxyz Px Pz
  1383     by (auto intro!: measurable_compose[OF _ distributed_real_measurable[OF Pxz]]
  1327     by auto
  1384                      measurable_Pair borel_measurable_times measurable_fst'' measurable_snd''
       
  1385              dest!: distributed_real_measurable simp: split_beta')
       
  1386   ultimately have I2: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))"
  1328   ultimately have I2: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))"
  1387     apply (rule integrable_cong_AE_imp)
  1329     apply (rule integrable_cong_AE_imp)
  1388     using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 ae9
  1330     using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 ae9
  1389     by eventually_elim
  1331     by eventually_elim
  1390        (auto simp: log_divide_eq log_mult_eq mult_nonneg_nonneg field_simps zero_less_mult_iff)
  1332        (auto simp: log_divide_eq log_mult_eq mult_nonneg_nonneg field_simps zero_less_mult_iff)
  1397     apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff)
  1339     apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff)
  1398     done
  1340     done
  1399 
  1341 
  1400   let ?P = "density (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) Pxyz"
  1342   let ?P = "density (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) Pxyz"
  1401   interpret P: prob_space ?P
  1343   interpret P: prob_space ?P
  1402     unfolding distributed_distr_eq_density[OF Pxyz, symmetric]
  1344     unfolding distributed_distr_eq_density[OF Pxyz, symmetric] by (rule prob_space_distr) simp
  1403     using distributed_measurable[OF Pxyz] by (rule prob_space_distr)
       
  1404 
  1345 
  1405   let ?Q = "density (T \<Otimes>\<^isub>M P) Pyz"
  1346   let ?Q = "density (T \<Otimes>\<^isub>M P) Pyz"
  1406   interpret Q: prob_space ?Q
  1347   interpret Q: prob_space ?Q
  1407     unfolding distributed_distr_eq_density[OF Pyz, symmetric]
  1348     unfolding distributed_distr_eq_density[OF Pyz, symmetric] by (rule prob_space_distr) simp
  1408     using distributed_measurable[OF Pyz] by (rule prob_space_distr)
       
  1409 
  1349 
  1410   let ?f = "\<lambda>(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)"
  1350   let ?f = "\<lambda>(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)"
  1411 
  1351 
  1412   from subdensity_real[of snd, OF _ Pyz Pz]
  1352   from subdensity_real[of snd, OF _ Pyz Pz]
  1413   have aeX1: "AE x in T \<Otimes>\<^isub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" by (auto simp: comp_def)
  1353   have aeX1: "AE x in T \<Otimes>\<^isub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" by (auto simp: comp_def)
  1414   have aeX2: "AE x in T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd x)"
  1354   have aeX2: "AE x in T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd x)"
  1415     using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable)
  1355     using Pz by (intro TP.AE_pair_measure) (auto dest: distributed_real_AE)
  1416 
  1356 
  1417   have aeX3: "AE y in T \<Otimes>\<^isub>M P. (\<integral>\<^isup>+ x. ereal (Pxz (x, snd y)) \<partial>S) = ereal (Pz (snd y))"
  1357   have aeX3: "AE y in T \<Otimes>\<^isub>M P. (\<integral>\<^isup>+ x. ereal (Pxz (x, snd y)) \<partial>S) = ereal (Pz (snd y))"
  1418     using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz]
  1358     using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz]
  1419     apply (intro TP.AE_pair_measure)
  1359     by (intro TP.AE_pair_measure) (auto dest: distributed_real_AE)
  1420     apply (auto simp: comp_def measurable_split_conv
       
  1421                 intro!: measurable_snd'' measurable_fst'' borel_measurable_ereal_eq borel_measurable_ereal
       
  1422                         S.borel_measurable_positive_integral measurable_compose[OF _ Pxz[THEN distributed_real_measurable]]
       
  1423                         measurable_Pair
       
  1424                 dest: distributed_real_AE distributed_real_measurable)
       
  1425     done
       
  1426 
       
  1427   note M = borel_measurable_divide borel_measurable_diff borel_measurable_times borel_measurable_ereal
       
  1428            measurable_compose[OF _ measurable_snd]
       
  1429            measurable_Pair
       
  1430            measurable_compose[OF _ Pxyz[THEN distributed_real_measurable]]
       
  1431            measurable_compose[OF _ Pxz[THEN distributed_real_measurable]]
       
  1432            measurable_compose[OF _ Pyz[THEN distributed_real_measurable]]
       
  1433            measurable_compose[OF _ Pz[THEN distributed_real_measurable]]
       
  1434            measurable_compose[OF _ Px[THEN distributed_real_measurable]]
       
  1435            TP.borel_measurable_positive_integral
       
  1436   have "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<le> (\<integral>\<^isup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))"
  1360   have "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<le> (\<integral>\<^isup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))"
  1437     apply (subst positive_integral_density)
  1361     apply (subst positive_integral_density)
  1438     apply (rule distributed_borel_measurable[OF Pxyz])
  1362     apply (rule distributed_borel_measurable[OF Pxyz])
  1439     apply (rule distributed_AE[OF Pxyz])
  1363     apply (rule distributed_AE[OF Pxyz])
  1440     apply (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M) []
  1364     apply simp
  1441     apply (rule positive_integral_mono_AE)
  1365     apply (rule positive_integral_mono_AE)
  1442     using ae5 ae6 ae7 ae8
  1366     using ae5 ae6 ae7 ae8
  1443     apply eventually_elim
  1367     apply eventually_elim
  1444     apply (auto intro!: divide_nonneg_nonneg mult_nonneg_nonneg)
  1368     apply (auto intro!: divide_nonneg_nonneg mult_nonneg_nonneg)
  1445     done
  1369     done
  1446   also have "\<dots> = (\<integral>\<^isup>+(y, z). \<integral>\<^isup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \<partial>S \<partial>T \<Otimes>\<^isub>M P)"
  1370   also have "\<dots> = (\<integral>\<^isup>+(y, z). \<integral>\<^isup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \<partial>S \<partial>T \<Otimes>\<^isub>M P)"
  1447     by (subst STP.positive_integral_snd_measurable[symmetric])
  1371     by (subst STP.positive_integral_snd_measurable[symmetric])
  1448        (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M)
  1372        (auto simp add: split_beta')
  1449   also have "\<dots> = (\<integral>\<^isup>+x. ereal (Pyz x) * 1 \<partial>T \<Otimes>\<^isub>M P)"
  1373   also have "\<dots> = (\<integral>\<^isup>+x. ereal (Pyz x) * 1 \<partial>T \<Otimes>\<^isub>M P)"
  1450     apply (rule positive_integral_cong_AE)
  1374     apply (rule positive_integral_cong_AE)
  1451     using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space
  1375     using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space
  1452     apply eventually_elim
  1376     apply eventually_elim
  1453   proof (case_tac x, simp del: times_ereal.simps add: space_pair_measure)
  1377   proof (case_tac x, simp del: times_ereal.simps add: space_pair_measure)
  1454     fix a b assume "Pz b = 0 \<longrightarrow> Pyz (a, b) = 0" "0 \<le> Pz b" "a \<in> space T \<and> b \<in> space P"
  1378     fix a b assume "Pz b = 0 \<longrightarrow> Pyz (a, b) = 0" "0 \<le> Pz b" "a \<in> space T \<and> b \<in> space P"
  1455       "(\<integral>\<^isup>+ x. ereal (Pxz (x, b)) \<partial>S) = ereal (Pz b)" "0 \<le> Pyz (a, b)" 
  1379       "(\<integral>\<^isup>+ x. ereal (Pxz (x, b)) \<partial>S) = ereal (Pz b)" "0 \<le> Pyz (a, b)" 
  1456     then show "(\<integral>\<^isup>+ x. ereal (Pxz (x, b)) * ereal (Pyz (a, b) / Pz b) \<partial>S) = ereal (Pyz (a, b))"
  1380     then show "(\<integral>\<^isup>+ x. ereal (Pxz (x, b)) * ereal (Pyz (a, b) / Pz b) \<partial>S) = ereal (Pyz (a, b))"
  1457       apply (subst positive_integral_multc)
  1381       by (subst positive_integral_multc) (auto intro!: divide_nonneg_nonneg)
  1458       apply (auto intro!: borel_measurable_ereal divide_nonneg_nonneg
       
  1459                           measurable_compose[OF _ Pxz[THEN distributed_real_measurable]] measurable_Pair
       
  1460                   split: prod.split)
       
  1461       done
       
  1462   qed
  1382   qed
  1463   also have "\<dots> = 1"
  1383   also have "\<dots> = 1"
  1464     using Q.emeasure_space_1 distributed_AE[OF Pyz] distributed_distr_eq_density[OF Pyz]
  1384     using Q.emeasure_space_1 distributed_AE[OF Pyz] distributed_distr_eq_density[OF Pyz]
  1465     by (subst positive_integral_density[symmetric]) (auto intro!: M)
  1385     by (subst positive_integral_density[symmetric]) auto
  1466   finally have le1: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<le> 1" .
  1386   finally have le1: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<le> 1" .
  1467   also have "\<dots> < \<infinity>" by simp
  1387   also have "\<dots> < \<infinity>" by simp
  1468   finally have fin: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<noteq> \<infinity>" by simp
  1388   finally have fin: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<noteq> \<infinity>" by simp
  1469 
  1389 
  1470   have pos: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<noteq> 0"
  1390   have pos: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<noteq> 0"
  1471     apply (subst positive_integral_density)
  1391     apply (subst positive_integral_density)
  1472     apply (rule distributed_borel_measurable[OF Pxyz])
  1392     apply simp
  1473     apply (rule distributed_AE[OF Pxyz])
  1393     apply (rule distributed_AE[OF Pxyz])
  1474     apply (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M) []
  1394     apply simp
  1475     apply (simp add: split_beta')
  1395     apply (simp add: split_beta')
  1476   proof
  1396   proof
  1477     let ?g = "\<lambda>x. ereal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))"
  1397     let ?g = "\<lambda>x. ereal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))"
  1478     assume "(\<integral>\<^isup>+ x. ?g x \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P)) = 0"
  1398     assume "(\<integral>\<^isup>+ x. ?g x \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P)) = 0"
  1479     then have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. ?g x \<le> 0"
  1399     then have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. ?g x \<le> 0"
  1480       by (intro positive_integral_0_iff_AE[THEN iffD1]) (auto intro!: M borel_measurable_ereal measurable_If)
  1400       by (intro positive_integral_0_iff_AE[THEN iffD1]) (auto intro!: borel_measurable_ereal measurable_If)
  1481     then have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pxyz x = 0"
  1401     then have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pxyz x = 0"
  1482       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1402       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1483       by eventually_elim (auto split: split_if_asm simp: mult_le_0_iff divide_le_0_iff)
  1403       by eventually_elim (auto split: split_if_asm simp: mult_le_0_iff divide_le_0_iff)
  1484     then have "(\<integral>\<^isup>+ x. ereal (Pxyz x) \<partial>S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) = 0"
  1404     then have "(\<integral>\<^isup>+ x. ereal (Pxyz x) \<partial>S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) = 0"
  1485       by (subst positive_integral_cong_AE[of _ "\<lambda>x. 0"]) auto
  1405       by (subst positive_integral_cong_AE[of _ "\<lambda>x. 0"]) auto
  1486     with P.emeasure_space_1 show False
  1406     with P.emeasure_space_1 show False
  1487       by (subst (asm) emeasure_density) (auto intro!: M cong: positive_integral_cong)
  1407       by (subst (asm) emeasure_density) (auto cong: positive_integral_cong)
  1488   qed
  1408   qed
  1489 
  1409 
  1490   have neg: "(\<integral>\<^isup>+ x. - ?f x \<partial>?P) = 0"
  1410   have neg: "(\<integral>\<^isup>+ x. - ?f x \<partial>?P) = 0"
  1491     apply (rule positive_integral_0_iff_AE[THEN iffD2])
  1411     apply (rule positive_integral_0_iff_AE[THEN iffD2])
  1492     apply (auto intro!: M simp: split_beta') []
  1412     apply (auto simp: split_beta') []
  1493     apply (subst AE_density)
  1413     apply (subst AE_density)
  1494     apply (auto intro!: M simp: split_beta') []
  1414     apply (auto simp: split_beta') []
  1495     using ae5 ae6 ae7 ae8
  1415     using ae5 ae6 ae7 ae8
  1496     apply eventually_elim
  1416     apply eventually_elim
  1497     apply (auto intro!: mult_nonneg_nonneg divide_nonneg_nonneg)
  1417     apply (auto intro!: mult_nonneg_nonneg divide_nonneg_nonneg)
  1498     done
  1418     done
  1499 
  1419 
  1500   have I3: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))"
  1420   have I3: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))"
  1501     apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integral_diff(1)[OF I1 I2]])
  1421     apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integral_diff(1)[OF I1 I2]])
  1502     using ae
  1422     using ae
  1503     apply (auto intro!: M simp: split_beta')
  1423     apply (auto simp: split_beta')
  1504     done
  1424     done
  1505 
  1425 
  1506   have "- log b 1 \<le> - log b (integral\<^isup>L ?P ?f)"
  1426   have "- log b 1 \<le> - log b (integral\<^isup>L ?P ?f)"
  1507   proof (intro le_imp_neg_le log_le[OF b_gt_1])
  1427   proof (intro le_imp_neg_le log_le[OF b_gt_1])
  1508     show "0 < integral\<^isup>L ?P ?f"
  1428     show "0 < integral\<^isup>L ?P ?f"
  1518       unfolding AE_density[OF distributed_borel_measurable[OF Pxyz]]
  1438       unfolding AE_density[OF distributed_borel_measurable[OF Pxyz]]
  1519       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1439       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1520       by eventually_elim (auto simp: divide_pos_pos mult_pos_pos)
  1440       by eventually_elim (auto simp: divide_pos_pos mult_pos_pos)
  1521     show "integrable ?P ?f"
  1441     show "integrable ?P ?f"
  1522       unfolding integrable_def 
  1442       unfolding integrable_def 
  1523       using fin neg by (auto intro!: M simp: split_beta')
  1443       using fin neg by (auto simp: split_beta')
  1524     show "integrable ?P (\<lambda>x. - log b (?f x))"
  1444     show "integrable ?P (\<lambda>x. - log b (?f x))"
  1525       apply (subst integral_density)
  1445       apply (subst integral_density)
  1526       apply (auto intro!: M) []
  1446       apply simp
  1527       apply (auto intro!: M distributed_real_AE[OF Pxyz]) []
  1447       apply (auto intro!: distributed_real_AE[OF Pxyz]) []
  1528       apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') []
  1448       apply simp
  1529       apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3])
  1449       apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3])
  1530       apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') []
  1450       apply simp
  1531       apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') []
  1451       apply simp
  1532       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1452       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1533       apply eventually_elim
  1453       apply eventually_elim
  1534       apply (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps)
  1454       apply (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps)
  1535       done
  1455       done
  1536   qed (auto simp: b_gt_1 minus_log_convex)
  1456   qed (auto simp: b_gt_1 minus_log_convex)
  1537   also have "\<dots> = conditional_mutual_information b S T P X Y Z"
  1457   also have "\<dots> = conditional_mutual_information b S T P X Y Z"
  1538     unfolding `?eq`
  1458     unfolding `?eq`
  1539     apply (subst integral_density)
  1459     apply (subst integral_density)
  1540     apply (auto intro!: M) []
  1460     apply simp
  1541     apply (auto intro!: M distributed_real_AE[OF Pxyz]) []
  1461     apply (auto intro!: distributed_real_AE[OF Pxyz]) []
  1542     apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') []
  1462     apply simp
  1543     apply (intro integral_cong_AE)
  1463     apply (intro integral_cong_AE)
  1544     using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1464     using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
  1545     apply eventually_elim
  1465     apply eventually_elim
  1546     apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps)
  1466     apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps)
  1547     done
  1467     done
  1631   "\<H>(X | Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y"
  1551   "\<H>(X | Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y"
  1632 
  1552 
  1633 lemma (in information_space) conditional_entropy_generic_eq:
  1553 lemma (in information_space) conditional_entropy_generic_eq:
  1634   fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
  1554   fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
  1635   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T"
  1555   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T"
  1636   assumes Py: "distributed M T Y Py"
  1556   assumes Py[measurable]: "distributed M T Y Py"
  1637   assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy"
  1557   assumes Pxy[measurable]: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy"
  1638   shows "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))"
  1558   shows "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))"
  1639 proof -
  1559 proof -
  1640   interpret S: sigma_finite_measure S by fact
  1560   interpret S: sigma_finite_measure S by fact
  1641   interpret T: sigma_finite_measure T by fact
  1561   interpret T: sigma_finite_measure T by fact
  1642   interpret ST: pair_sigma_finite S T ..
  1562   interpret ST: pair_sigma_finite S T ..