diff -r b0d590e75592 -r 5c691b178e08 src/HOL/Probability/Information.thy --- a/src/HOL/Probability/Information.thy Fri Jul 05 21:59:43 2024 +0100 +++ b/src/HOL/Probability/Information.thy Sat Jul 06 12:51:31 2024 +0100 @@ -10,47 +10,22 @@ Independent_Family begin + +lemma log_mult_nz: + "x\0 \ y\0 \ log a (x * y) = log a x + log a y" + by (simp add: log_def ln_mult divide_inverse distrib_right) + +lemma log_divide_nz: + "x\0 \ y\0 \ log a (x / y) = log a x - log a y" + by (simp add: diff_divide_distrib ln_div log_def) + subsection "Information theory" locale information_space = prob_space + fixes b :: real assumes b_gt_1: "1 < b" -context information_space -begin - text \Introduce some simplification rules for logarithm of base \<^term>\b\.\ - -lemma log_neg_const: - assumes "x \ 0" - shows "log b x = log b 0" -proof - - { fix u :: real - have "x \ 0" by fact - also have "0 < exp u" - using exp_gt_zero . - finally have "exp u \ x" - by auto } - then show "log b x = log b 0" - by (simp add: log_def ln_real_def) -qed - -lemma log_mult_eq: - "log b (A * B) = (if 0 < A * B then log b \A\ + log b \B\ else log b 0)" - using log_mult[of "\A\" "\B\"] b_gt_1 log_neg_const[of "A * B"] - by (auto simp: zero_less_mult_iff mult_le_0_iff) - -lemma log_inverse_eq: - "log b (inverse B) = (if 0 < B then - log b B else log b 0)" - using ln_inverse log_def log_neg_const by force - -lemma log_divide_eq: - "log b (A / B) = (if 0 < A * B then (log b \A\) - log b \B\ else log b 0)" - unfolding divide_inverse log_mult_eq log_inverse_eq abs_inverse - by (auto simp: zero_less_mult_iff mult_le_0_iff) - -lemmas log_simps = log_mult_eq log_inverse_eq log_divide_eq - -end +lemmas log_simps = log_mult log_inverse log_divide subsection "Kullback$-$Leibler divergence" @@ -575,10 +550,16 @@ by (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'' intro!: borel_measurable_times borel_measurable_log borel_measurable_divide) ultimately have int: "integrable (S \\<^sub>M T) f" - apply (rule integrable_cong_AE_imp) - using A B - by (auto simp: f_def log_divide_eq log_mult_eq field_simps space_pair_measure Px_nn Py_nn Pxy_nn - less_le) + proof (rule integrable_cong_AE_imp) + show "AE x in S \\<^sub>M + T. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)) + = f x" + using A B unfolding f_def + + apply (auto simp: f_def field_simps space_pair_measure Px_nn Py_nn Pxy_nn + less_le ) + by (smt (verit, del_insts) AE_I2 distrib_left log_divide log_mult mult_eq_0_iff) + qed show "0 \ ?M" unfolding M proof (intro ST.KL_density_density_nonneg) @@ -828,7 +809,7 @@ have "integrable MX (\x. Px x * log b (1 / Px x)) = integrable MX (\x. - Px x * log b (Px x))" using Px - by (intro integrable_cong_AE) (auto simp: log_divide_eq less_le) + by (intro integrable_cong_AE) (auto simp: log_divide_pos log_recip) then show "integrable (distr M MX X) (\x. - log b (1 / Px x))" unfolding distributed_distr_eq_density[OF X] using Px int @@ -836,7 +817,7 @@ qed (auto simp: minus_log_convex[OF b_gt_1]) also have "\ = (\ x. log b (Px x) \distr M MX X)" unfolding distributed_distr_eq_density[OF X] using Px - by (intro integral_cong_AE) (auto simp: AE_density log_divide_eq) + by (intro integral_cong_AE) (auto simp: AE_density log_divide_pos) also have "\ = - entropy b MX X" unfolding distributed_distr_eq_density[OF X] using Px by (subst entropy_distr[OF X]) (auto simp: integral_density) @@ -872,7 +853,7 @@ have eq: "(\ x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \MX) = (\ x. (- log b (measure MX A) / measure MX A) * indicator A x \MX)" using uniform_distributed_params[OF X] - by (intro Bochner_Integration.integral_cong) (auto split: split_indicator simp: log_divide_eq zero_less_measure_iff) + by (intro Bochner_Integration.integral_cong) (auto split: split_indicator simp: log_divide_pos zero_less_measure_iff) show "- (\ x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \MX) = log b (measure MX A)" unfolding eq using uniform_distributed_params[OF X] @@ -1116,9 +1097,9 @@ have "integrable (S \\<^sub>M T \\<^sub>M P) (\x. Pxyz x * - log b (?f x))" apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3]) using ae1 ae2 ae3 ae4 - apply (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps - less_le space_pair_measure) - done + apply (auto simp: log_divide log_mult zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps + less_le space_pair_measure split: prod.split) + by (smt (verit, best) AE_I2 fst_conv log_divide mult_cancel_left mult_minus_right snd_conv) then show "integrable ?P (\x. - log b (?f x))" by (subst integrable_real_density) (auto simp: space_pair_measure) @@ -1131,9 +1112,9 @@ apply simp apply (intro integral_cong_AE) using ae1 ae2 ae3 ae4 - apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps - space_pair_measure less_le) - done + apply (auto simp: zero_less_mult_iff zero_less_divide_iff field_simps + space_pair_measure less_le split: prod.split) + by (smt (verit, best) AE_I2 fst_conv log_divide mult_eq_0_iff mult_minus_right snd_conv) finally show ?nonneg by simp qed @@ -1233,8 +1214,10 @@ ultimately have I1: "integrable (S \\<^sub>M T \\<^sub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" apply (rule integrable_cong_AE_imp) using ae1 ae4 Px_nn Pyz_nn Pxyz_nn - by (auto simp: log_divide_eq log_mult_eq field_simps zero_less_mult_iff space_pair_measure less_le) - + apply (auto simp: log_divide log_mult field_simps zero_less_mult_iff space_pair_measure less_le + split: prod.split) + apply (intro AE_I2) + by (smt (verit, best) distrib_left divide_eq_0_iff fst_conv log_mult mult.commute nonzero_mult_div_cancel_left snd_conv times_divide_eq_right) have "integrable (S \\<^sub>M T \\<^sub>M P) (\x. Pxyz x * log b (Pxz (fst x, snd (snd x))) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pz (snd (snd x))))" using finite_entropy_integrable_transform[OF Fxz Pxyz Pxyz_nn, of "\x. (fst x, snd (snd x))"] @@ -1247,7 +1230,41 @@ ultimately have I2: "integrable (S \\<^sub>M T \\<^sub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" apply (rule integrable_cong_AE_imp) using ae1 ae2 ae3 ae4 Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn - by (auto simp: log_divide_eq log_mult_eq field_simps zero_less_mult_iff less_le space_pair_measure) + apply(auto simp: field_simps zero_less_mult_iff less_le space_pair_measure split: prod.split) + apply (intro AE_I2) + apply clarify + proof - + fix a :: 'c and aa :: 'd and ba :: 'e and x1 :: 'c and ab :: 'd and baa :: 'e + assume a1: "Pxz (fst (x1, ab, baa), snd (snd (x1, ab, baa))) = 0 \ Pxyz (x1, ab, baa) = 0" + assume a2: "Pz (snd (snd (x1, ab, baa))) = 0 \ Pxyz (x1, ab, baa) = 0" + assume a3: "Px (fst (x1, ab, baa)) = 0 \ Pxyz (x1, ab, baa) = 0" + have f4: "\r ra rb. (ra::real) * (rb + r) = rb * ra + ra * r" + by (simp add: vector_space_over_itself.scale_right_distrib) + have f5: "0 \ Px x1 \ 0 = Pxyz (x1, ab, baa)" + using a3 by auto + have f6: "0 \ Pz baa \ 0 = Pxyz (x1, ab, baa)" + using a2 by force + have f7: "\r. (0::real) = 0 * r" + by simp + have "0 = Px x1 * Pz baa \ Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa)))" + using f6 f5 by simp + moreover + { assume "0 \ Px x1 * Pz baa" + moreover + { assume "0 \ Px x1 * Pz baa \ 0 \ Pxyz (x1, ab, baa)" + then have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1 * Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa))) \ 0 \ Pxz (x1, baa) / (Px x1 * Pz baa)" + using a1 by (metis (no_types) divide_eq_0_iff fst_eqD log_mult_nz mult.commute nonzero_eq_divide_eq snd_eqD) + moreover + { assume "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + (log b 0 + log b 0)) \ 0 \ Pxz (x1, baa) / (Px x1 * Pz baa) \ Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pz baa * (Pxz (x1, baa) / (Px x1 * Pz baa)))" + then have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))" + using f7 f4 by (metis (no_types) divide_eq_0_iff log_mult_nz mult.commute) } + ultimately have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa))) \ Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))" + using f4 by (simp add: log_mult_nz) } + ultimately have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa))) \ Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa)) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)) \ Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))" + using f7 by (metis (no_types)) } + ultimately show "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))" + by argo + qed from ae I1 I2 show ?eq unfolding conditional_mutual_information_def mi_eq @@ -1355,8 +1372,12 @@ have "integrable (S \\<^sub>M T \\<^sub>M P) (\x. Pxyz x * - log b (?f x))" apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3]) using Pz_nn Pxz_nn Pyz_nn Pxyz_nn ae2 ae3 ae4 - by (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff - zero_less_divide_iff field_simps space_pair_measure less_le) + apply (auto simp: log_divide_nz log_mult_nz zero_le_mult_iff zero_less_mult_iff + zero_less_divide_iff field_simps space_pair_measure less_le split: prod.split) + apply (intro AE_I2) + apply (auto simp: ) + by (smt (verit, best) log_divide minus_mult_minus mult_minus_left no_zero_divisors) + then show "integrable ?P (\x. - log b (?f x))" using Pxyz_nn by (auto simp: integrable_real_density) @@ -1370,9 +1391,10 @@ apply simp apply (intro integral_cong_AE) using ae1 ae2 ae3 ae4 - apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff - field_simps space_pair_measure less_le integral_cong_AE) - done + apply (auto simp: log_divide_pos zero_less_mult_iff zero_less_divide_iff + field_simps space_pair_measure less_le integral_cong_AE split: prod.split) + apply (intro AE_I2) + by (metis divide_divide_eq_right log_recip mult_1 mult_minus_right) finally show ?nonneg by simp qed @@ -1632,9 +1654,6 @@ apply (subst conditional_entropy_eq[OF Py Pxy]) apply (auto intro!: sum.cong simp: Pxxy_eq sum_negf[symmetric] eq sum.reindex[OF inj] log_simps zero_less_mult_iff zero_le_mult_iff field_simps mult_less_0_iff AE_count_space) - using Py[THEN simple_distributed] Pxy[THEN simple_distributed] - apply (auto simp add: not_le AE_count_space less_le antisym - simple_distributed_nonneg[OF Py] simple_distributed_nonneg[OF Pxy]) done qed