src/HOL/Probability/Information.thy
changeset 49788 3c10763f5cb4
parent 49787 d8de705b48d4
child 49790 6b9b9ebba47d
equal deleted inserted replaced
49787:d8de705b48d4 49788:3c10763f5cb4
   564 
   564 
   565 abbreviation (in information_space)
   565 abbreviation (in information_space)
   566   entropy_Pow ("\<H>'(_')") where
   566   entropy_Pow ("\<H>'(_')") where
   567   "\<H>(X) \<equiv> entropy b (count_space (X`space M)) X"
   567   "\<H>(X) \<equiv> entropy b (count_space (X`space M)) X"
   568 
   568 
   569 lemma (in information_space) entropy_distr:
   569 lemma (in information_space)
   570   fixes X :: "'a \<Rightarrow> 'b"
   570   fixes X :: "'a \<Rightarrow> 'b"
   571   assumes X: "distributed M MX X f"
   571   assumes X: "distributed M MX X f"
   572   shows "entropy b MX X = - (\<integral>x. f x * log b (f x) \<partial>MX)"
   572   shows entropy_distr: "entropy b MX X = - (\<integral>x. f x * log b (f x) \<partial>MX)" (is ?eq)
   573   unfolding entropy_def KL_divergence_def entropy_density_def comp_def
   573 proof -
   574 proof (subst integral_cong_AE)
       
   575   note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X]
   574   note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X]
   576   interpret X: prob_space "distr M MX X"
   575     interpret X: prob_space "distr M MX X"
   577     using D(1) by (rule prob_space_distr)
   576     using D(1) by (rule prob_space_distr)
   578 
   577 
   579   have sf: "sigma_finite_measure (distr M MX X)" by default
   578   have sf: "sigma_finite_measure (distr M MX X)" by default
   580   have ae: "AE x in MX. f x = RN_deriv MX (density MX f) x"
   579   have ae: "AE x in MX. f x = RN_deriv MX (density MX f) x"
   581     using D
   580     using D
   582     by (intro RN_deriv_unique_sigma_finite)
   581     by (intro RN_deriv_unique_sigma_finite)
   583        (auto intro: divide_nonneg_nonneg measure_nonneg
   582        (auto intro: divide_nonneg_nonneg measure_nonneg
   584              simp: distributed_distr_eq_density[symmetric, OF X] sf)
   583              simp: distributed_distr_eq_density[symmetric, OF X] sf)
   585   show "AE x in distr M MX X. log b (real (RN_deriv MX (distr M MX X) x)) =
   584 
       
   585   have ae_eq: "AE x in distr M MX X. log b (real (RN_deriv MX (distr M MX X) x)) =
   586     log b (f x)"
   586     log b (f x)"
   587     unfolding distributed_distr_eq_density[OF X]
   587     unfolding distributed_distr_eq_density[OF X]
   588     apply (subst AE_density)
   588     apply (subst AE_density)
   589     using D apply simp
   589     using D apply simp
   590     using ae apply eventually_elim
   590     using ae apply eventually_elim
   591     apply (subst (asm) eq_commute)
   591     apply (subst (asm) eq_commute)
   592     apply auto
   592     apply auto
   593     done
   593     done
   594   show "- (\<integral> x. log b (f x) \<partial>distr M MX X) = - (\<integral> x. f x * log b (f x) \<partial>MX)"
   594 
       
   595   have int_eq: "- (\<integral> x. log b (f x) \<partial>distr M MX X) = - (\<integral> x. f x * log b (f x) \<partial>MX)"
   595     unfolding distributed_distr_eq_density[OF X]
   596     unfolding distributed_distr_eq_density[OF X]
   596     using D
   597     using D
   597     by (subst integral_density)
   598     by (subst integral_density)
   598        (auto simp: borel_measurable_ereal_iff)
   599        (auto simp: borel_measurable_ereal_iff)
   599 qed 
   600 
       
   601   show ?eq
       
   602     unfolding entropy_def KL_divergence_def entropy_density_def comp_def
       
   603     apply (subst integral_cong_AE)
       
   604     apply (rule ae_eq)
       
   605     apply (rule int_eq)
       
   606     done
       
   607 qed
   600 
   608 
   601 lemma (in prob_space) distributed_imp_emeasure_nonzero:
   609 lemma (in prob_space) distributed_imp_emeasure_nonzero:
   602   assumes X: "distributed M MX X Px"
   610   assumes X: "distributed M MX X Px"
   603   shows "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> 0"
   611   shows "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> 0"
   604 proof
   612 proof
   894   have aeX1: "AE x in T \<Otimes>\<^isub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" by (auto simp: comp_def)
   902   have aeX1: "AE x in T \<Otimes>\<^isub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" by (auto simp: comp_def)
   895   have aeX2: "AE x in T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd x)"
   903   have aeX2: "AE x in T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd x)"
   896     using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable)
   904     using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable)
   897 
   905 
   898   have aeX3: "AE y in T \<Otimes>\<^isub>M P. (\<integral>\<^isup>+ x. ereal (Pxz (x, snd y)) \<partial>S) = ereal (Pz (snd y))"
   906   have aeX3: "AE y in T \<Otimes>\<^isub>M P. (\<integral>\<^isup>+ x. ereal (Pxz (x, snd y)) \<partial>S) = ereal (Pz (snd y))"
   899     using Pz distributed_marginal_eq_joint[OF P S Px Pz Pxz]
   907     using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz]
   900     apply (intro TP.AE_pair_measure)
   908     apply (intro TP.AE_pair_measure)
   901     apply (auto simp: comp_def measurable_split_conv
   909     apply (auto simp: comp_def measurable_split_conv
   902                 intro!: measurable_snd'' borel_measurable_ereal_eq borel_measurable_ereal
   910                 intro!: measurable_snd'' borel_measurable_ereal_eq borel_measurable_ereal
   903                         SP.borel_measurable_positive_integral_snd measurable_compose[OF _ Pxz[THEN distributed_real_measurable]]
   911                         SP.borel_measurable_positive_integral_snd measurable_compose[OF _ Pxz[THEN distributed_real_measurable]]
   904                         measurable_Pair
   912                         measurable_Pair
  1108 
  1116 
  1109 abbreviation (in information_space)
  1117 abbreviation (in information_space)
  1110   conditional_entropy_Pow ("\<H>'(_ | _')") where
  1118   conditional_entropy_Pow ("\<H>'(_ | _')") where
  1111   "\<H>(X | Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y"
  1119   "\<H>(X | Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y"
  1112 
  1120 
  1113 lemma (in information_space) conditional_entropy_generic_eq:
  1121 lemma (in information_space)
  1114   fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
  1122   fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
  1115   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T"
  1123   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T"
  1116   assumes Px: "distributed M S X Px"
  1124   assumes Px: "distributed M S X Px"
  1117   assumes Py: "distributed M T Y Py"
  1125   assumes Py: "distributed M T Y Py"
  1118   assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy"
  1126   assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy"
  1119   assumes I1: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Pxy x))"
  1127   assumes I1: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Pxy x))"
  1120   assumes I2: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))"
  1128   assumes I2: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))"
  1121   shows "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))"
  1129   shows conditional_entropy_generic_eq: "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))" (is ?eq)
  1122 proof -
  1130 proof -
  1123   interpret S: sigma_finite_measure S by fact
  1131   interpret S: sigma_finite_measure S by fact
  1124   interpret T: sigma_finite_measure T by fact
  1132   interpret T: sigma_finite_measure T by fact
  1125   interpret ST: pair_sigma_finite S T ..
  1133   interpret ST: pair_sigma_finite S T ..
  1126   have ST: "sigma_finite_measure (S \<Otimes>\<^isub>M T)" ..
  1134   have ST: "sigma_finite_measure (S \<Otimes>\<^isub>M T)" ..
  1127 
  1135 
  1128   interpret Pxy: prob_space "density (S \<Otimes>\<^isub>M T) Pxy"
  1136   let ?P = "density (S \<Otimes>\<^isub>M T) Pxy"
       
  1137   interpret Pxy: prob_space ?P
  1129     unfolding Pxy[THEN distributed_distr_eq_density, symmetric]
  1138     unfolding Pxy[THEN distributed_distr_eq_density, symmetric]
  1130     using Pxy[THEN distributed_measurable] by (rule prob_space_distr)
  1139     using Pxy[THEN distributed_measurable] by (rule prob_space_distr)
  1131 
  1140 
  1132   from Py Pxy have distr_eq: "distr M T Y =
  1141   from Py Pxy have distr_eq: "distr M T Y =
  1133     distr (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) T snd"
  1142     distr (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) T snd"
  1138   also have "\<dots> = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))"
  1147   also have "\<dots> = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))"
  1139     using b_gt_1 Py[THEN distributed_real_measurable]
  1148     using b_gt_1 Py[THEN distributed_real_measurable]
  1140     by (subst distributed_transform_integral[OF Pxy Py, where T=snd]) (auto intro!: integral_cong)
  1149     by (subst distributed_transform_integral[OF Pxy Py, where T=snd]) (auto intro!: integral_cong)
  1141   finally have e_eq: "entropy b T Y = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))" .
  1150   finally have e_eq: "entropy b T Y = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))" .
  1142   
  1151   
  1143   have "AE x in S \<Otimes>\<^isub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0"
  1152   have ae1: "AE x in S \<Otimes>\<^isub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0"
  1144     by (intro subdensity_real[of fst, OF _ Pxy Px]) (auto intro: measurable_Pair)
  1153     by (intro subdensity_real[of fst, OF _ Pxy Px]) (auto intro: measurable_Pair)
  1145   moreover have "AE x in S \<Otimes>\<^isub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0"
  1154   moreover have ae2: "AE x in S \<Otimes>\<^isub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0"
  1146     by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair)
  1155     by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair)
  1147   moreover have "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Px (fst x)"
  1156   moreover have ae3: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Px (fst x)"
  1148     using Px by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable)
  1157     using Px by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable)
  1149   moreover have "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Py (snd x)"
  1158   moreover have ae4: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Py (snd x)"
  1150     using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable)
  1159     using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable)
  1151   moreover note Pxy[THEN distributed_real_AE]
  1160   moreover note ae5 = Pxy[THEN distributed_real_AE]
  1152   ultimately have pos: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Pxy x \<and> 0 \<le> Px (fst x) \<and> 0 \<le> Py (snd x) \<and>
  1161   ultimately have pos: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Pxy x \<and> 0 \<le> Px (fst x) \<and> 0 \<le> Py (snd x) \<and>
  1153     (Pxy x = 0 \<or> (Pxy x \<noteq> 0 \<longrightarrow> 0 < Pxy x \<and> 0 < Px (fst x) \<and> 0 < Py (snd x)))"
  1162     (Pxy x = 0 \<or> (Pxy x \<noteq> 0 \<longrightarrow> 0 < Pxy x \<and> 0 < Px (fst x) \<and> 0 < Py (snd x)))"
  1154     by eventually_elim auto
  1163     by eventually_elim auto
  1155 
  1164 
  1156   from pos have "AE x in S \<Otimes>\<^isub>M T.
  1165   from pos have ae: "AE x in S \<Otimes>\<^isub>M T.
  1157      Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / Py (snd x))"
  1166      Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / Py (snd x))"
  1158     by eventually_elim (auto simp: log_simps mult_pos_pos field_simps b_gt_1)
  1167     by eventually_elim (auto simp: log_simps mult_pos_pos field_simps b_gt_1)
  1159   with I1 I2 show ?thesis
  1168   with I1 I2 show ?eq
  1160     unfolding conditional_entropy_def
  1169     unfolding conditional_entropy_def
  1161     apply (subst e_eq)
  1170     apply (subst e_eq)
  1162     apply (subst entropy_distr[OF Pxy])
  1171     apply (subst entropy_distr[OF Pxy])
  1163     unfolding minus_diff_minus
  1172     unfolding minus_diff_minus
  1164     apply (subst integral_diff(2)[symmetric])
  1173     apply (subst integral_diff(2)[symmetric])