564 |
564 |
565 abbreviation (in information_space) |
565 abbreviation (in information_space) |
566 entropy_Pow ("\<H>'(_')") where |
566 entropy_Pow ("\<H>'(_')") where |
567 "\<H>(X) \<equiv> entropy b (count_space (X`space M)) X" |
567 "\<H>(X) \<equiv> entropy b (count_space (X`space M)) X" |
568 |
568 |
569 lemma (in information_space) entropy_distr: |
569 lemma (in information_space) |
570 fixes X :: "'a \<Rightarrow> 'b" |
570 fixes X :: "'a \<Rightarrow> 'b" |
571 assumes X: "distributed M MX X f" |
571 assumes X: "distributed M MX X f" |
572 shows "entropy b MX X = - (\<integral>x. f x * log b (f x) \<partial>MX)" |
572 shows entropy_distr: "entropy b MX X = - (\<integral>x. f x * log b (f x) \<partial>MX)" (is ?eq) |
573 unfolding entropy_def KL_divergence_def entropy_density_def comp_def |
573 proof - |
574 proof (subst integral_cong_AE) |
|
575 note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X] |
574 note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X] |
576 interpret X: prob_space "distr M MX X" |
575 interpret X: prob_space "distr M MX X" |
577 using D(1) by (rule prob_space_distr) |
576 using D(1) by (rule prob_space_distr) |
578 |
577 |
579 have sf: "sigma_finite_measure (distr M MX X)" by default |
578 have sf: "sigma_finite_measure (distr M MX X)" by default |
580 have ae: "AE x in MX. f x = RN_deriv MX (density MX f) x" |
579 have ae: "AE x in MX. f x = RN_deriv MX (density MX f) x" |
581 using D |
580 using D |
582 by (intro RN_deriv_unique_sigma_finite) |
581 by (intro RN_deriv_unique_sigma_finite) |
583 (auto intro: divide_nonneg_nonneg measure_nonneg |
582 (auto intro: divide_nonneg_nonneg measure_nonneg |
584 simp: distributed_distr_eq_density[symmetric, OF X] sf) |
583 simp: distributed_distr_eq_density[symmetric, OF X] sf) |
585 show "AE x in distr M MX X. log b (real (RN_deriv MX (distr M MX X) x)) = |
584 |
|
585 have ae_eq: "AE x in distr M MX X. log b (real (RN_deriv MX (distr M MX X) x)) = |
586 log b (f x)" |
586 log b (f x)" |
587 unfolding distributed_distr_eq_density[OF X] |
587 unfolding distributed_distr_eq_density[OF X] |
588 apply (subst AE_density) |
588 apply (subst AE_density) |
589 using D apply simp |
589 using D apply simp |
590 using ae apply eventually_elim |
590 using ae apply eventually_elim |
591 apply (subst (asm) eq_commute) |
591 apply (subst (asm) eq_commute) |
592 apply auto |
592 apply auto |
593 done |
593 done |
594 show "- (\<integral> x. log b (f x) \<partial>distr M MX X) = - (\<integral> x. f x * log b (f x) \<partial>MX)" |
594 |
|
595 have int_eq: "- (\<integral> x. log b (f x) \<partial>distr M MX X) = - (\<integral> x. f x * log b (f x) \<partial>MX)" |
595 unfolding distributed_distr_eq_density[OF X] |
596 unfolding distributed_distr_eq_density[OF X] |
596 using D |
597 using D |
597 by (subst integral_density) |
598 by (subst integral_density) |
598 (auto simp: borel_measurable_ereal_iff) |
599 (auto simp: borel_measurable_ereal_iff) |
599 qed |
600 |
|
601 show ?eq |
|
602 unfolding entropy_def KL_divergence_def entropy_density_def comp_def |
|
603 apply (subst integral_cong_AE) |
|
604 apply (rule ae_eq) |
|
605 apply (rule int_eq) |
|
606 done |
|
607 qed |
600 |
608 |
601 lemma (in prob_space) distributed_imp_emeasure_nonzero: |
609 lemma (in prob_space) distributed_imp_emeasure_nonzero: |
602 assumes X: "distributed M MX X Px" |
610 assumes X: "distributed M MX X Px" |
603 shows "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> 0" |
611 shows "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> 0" |
604 proof |
612 proof |
894 have aeX1: "AE x in T \<Otimes>\<^isub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" by (auto simp: comp_def) |
902 have aeX1: "AE x in T \<Otimes>\<^isub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" by (auto simp: comp_def) |
895 have aeX2: "AE x in T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd x)" |
903 have aeX2: "AE x in T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd x)" |
896 using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) |
904 using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) |
897 |
905 |
898 have aeX3: "AE y in T \<Otimes>\<^isub>M P. (\<integral>\<^isup>+ x. ereal (Pxz (x, snd y)) \<partial>S) = ereal (Pz (snd y))" |
906 have aeX3: "AE y in T \<Otimes>\<^isub>M P. (\<integral>\<^isup>+ x. ereal (Pxz (x, snd y)) \<partial>S) = ereal (Pz (snd y))" |
899 using Pz distributed_marginal_eq_joint[OF P S Px Pz Pxz] |
907 using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz] |
900 apply (intro TP.AE_pair_measure) |
908 apply (intro TP.AE_pair_measure) |
901 apply (auto simp: comp_def measurable_split_conv |
909 apply (auto simp: comp_def measurable_split_conv |
902 intro!: measurable_snd'' borel_measurable_ereal_eq borel_measurable_ereal |
910 intro!: measurable_snd'' borel_measurable_ereal_eq borel_measurable_ereal |
903 SP.borel_measurable_positive_integral_snd measurable_compose[OF _ Pxz[THEN distributed_real_measurable]] |
911 SP.borel_measurable_positive_integral_snd measurable_compose[OF _ Pxz[THEN distributed_real_measurable]] |
904 measurable_Pair |
912 measurable_Pair |
1108 |
1116 |
1109 abbreviation (in information_space) |
1117 abbreviation (in information_space) |
1110 conditional_entropy_Pow ("\<H>'(_ | _')") where |
1118 conditional_entropy_Pow ("\<H>'(_ | _')") where |
1111 "\<H>(X | Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y" |
1119 "\<H>(X | Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y" |
1112 |
1120 |
1113 lemma (in information_space) conditional_entropy_generic_eq: |
1121 lemma (in information_space) |
1114 fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" |
1122 fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" |
1115 assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" |
1123 assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" |
1116 assumes Px: "distributed M S X Px" |
1124 assumes Px: "distributed M S X Px" |
1117 assumes Py: "distributed M T Y Py" |
1125 assumes Py: "distributed M T Y Py" |
1118 assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" |
1126 assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" |
1119 assumes I1: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Pxy x))" |
1127 assumes I1: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Pxy x))" |
1120 assumes I2: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))" |
1128 assumes I2: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))" |
1121 shows "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))" |
1129 shows conditional_entropy_generic_eq: "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))" (is ?eq) |
1122 proof - |
1130 proof - |
1123 interpret S: sigma_finite_measure S by fact |
1131 interpret S: sigma_finite_measure S by fact |
1124 interpret T: sigma_finite_measure T by fact |
1132 interpret T: sigma_finite_measure T by fact |
1125 interpret ST: pair_sigma_finite S T .. |
1133 interpret ST: pair_sigma_finite S T .. |
1126 have ST: "sigma_finite_measure (S \<Otimes>\<^isub>M T)" .. |
1134 have ST: "sigma_finite_measure (S \<Otimes>\<^isub>M T)" .. |
1127 |
1135 |
1128 interpret Pxy: prob_space "density (S \<Otimes>\<^isub>M T) Pxy" |
1136 let ?P = "density (S \<Otimes>\<^isub>M T) Pxy" |
|
1137 interpret Pxy: prob_space ?P |
1129 unfolding Pxy[THEN distributed_distr_eq_density, symmetric] |
1138 unfolding Pxy[THEN distributed_distr_eq_density, symmetric] |
1130 using Pxy[THEN distributed_measurable] by (rule prob_space_distr) |
1139 using Pxy[THEN distributed_measurable] by (rule prob_space_distr) |
1131 |
1140 |
1132 from Py Pxy have distr_eq: "distr M T Y = |
1141 from Py Pxy have distr_eq: "distr M T Y = |
1133 distr (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) T snd" |
1142 distr (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) T snd" |
1138 also have "\<dots> = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))" |
1147 also have "\<dots> = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))" |
1139 using b_gt_1 Py[THEN distributed_real_measurable] |
1148 using b_gt_1 Py[THEN distributed_real_measurable] |
1140 by (subst distributed_transform_integral[OF Pxy Py, where T=snd]) (auto intro!: integral_cong) |
1149 by (subst distributed_transform_integral[OF Pxy Py, where T=snd]) (auto intro!: integral_cong) |
1141 finally have e_eq: "entropy b T Y = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))" . |
1150 finally have e_eq: "entropy b T Y = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))" . |
1142 |
1151 |
1143 have "AE x in S \<Otimes>\<^isub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0" |
1152 have ae1: "AE x in S \<Otimes>\<^isub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0" |
1144 by (intro subdensity_real[of fst, OF _ Pxy Px]) (auto intro: measurable_Pair) |
1153 by (intro subdensity_real[of fst, OF _ Pxy Px]) (auto intro: measurable_Pair) |
1145 moreover have "AE x in S \<Otimes>\<^isub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" |
1154 moreover have ae2: "AE x in S \<Otimes>\<^isub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" |
1146 by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair) |
1155 by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair) |
1147 moreover have "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Px (fst x)" |
1156 moreover have ae3: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Px (fst x)" |
1148 using Px by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable) |
1157 using Px by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable) |
1149 moreover have "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Py (snd x)" |
1158 moreover have ae4: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Py (snd x)" |
1150 using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) |
1159 using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) |
1151 moreover note Pxy[THEN distributed_real_AE] |
1160 moreover note ae5 = Pxy[THEN distributed_real_AE] |
1152 ultimately have pos: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Pxy x \<and> 0 \<le> Px (fst x) \<and> 0 \<le> Py (snd x) \<and> |
1161 ultimately have pos: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Pxy x \<and> 0 \<le> Px (fst x) \<and> 0 \<le> Py (snd x) \<and> |
1153 (Pxy x = 0 \<or> (Pxy x \<noteq> 0 \<longrightarrow> 0 < Pxy x \<and> 0 < Px (fst x) \<and> 0 < Py (snd x)))" |
1162 (Pxy x = 0 \<or> (Pxy x \<noteq> 0 \<longrightarrow> 0 < Pxy x \<and> 0 < Px (fst x) \<and> 0 < Py (snd x)))" |
1154 by eventually_elim auto |
1163 by eventually_elim auto |
1155 |
1164 |
1156 from pos have "AE x in S \<Otimes>\<^isub>M T. |
1165 from pos have ae: "AE x in S \<Otimes>\<^isub>M T. |
1157 Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / Py (snd x))" |
1166 Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / Py (snd x))" |
1158 by eventually_elim (auto simp: log_simps mult_pos_pos field_simps b_gt_1) |
1167 by eventually_elim (auto simp: log_simps mult_pos_pos field_simps b_gt_1) |
1159 with I1 I2 show ?thesis |
1168 with I1 I2 show ?eq |
1160 unfolding conditional_entropy_def |
1169 unfolding conditional_entropy_def |
1161 apply (subst e_eq) |
1170 apply (subst e_eq) |
1162 apply (subst entropy_distr[OF Pxy]) |
1171 apply (subst entropy_distr[OF Pxy]) |
1163 unfolding minus_diff_minus |
1172 unfolding minus_diff_minus |
1164 apply (subst integral_diff(2)[symmetric]) |
1173 apply (subst integral_diff(2)[symmetric]) |