--- a/src/HOL/Probability/Information.thy Fri Jul 05 21:59:43 2024 +0100
+++ b/src/HOL/Probability/Information.thy Sat Jul 06 12:51:31 2024 +0100
@@ -10,47 +10,22 @@
Independent_Family
begin
+
+lemma log_mult_nz:
+ "x\<noteq>0 \<Longrightarrow> y\<noteq>0 \<Longrightarrow> log a (x * y) = log a x + log a y"
+ by (simp add: log_def ln_mult divide_inverse distrib_right)
+
+lemma log_divide_nz:
+ "x\<noteq>0 \<Longrightarrow> y\<noteq>0 \<Longrightarrow> log a (x / y) = log a x - log a y"
+ by (simp add: diff_divide_distrib ln_div log_def)
+
subsection "Information theory"
locale information_space = prob_space +
fixes b :: real assumes b_gt_1: "1 < b"
-context information_space
-begin
-
text \<open>Introduce some simplification rules for logarithm of base \<^term>\<open>b\<close>.\<close>
-
-lemma log_neg_const:
- assumes "x \<le> 0"
- shows "log b x = log b 0"
-proof -
- { fix u :: real
- have "x \<le> 0" by fact
- also have "0 < exp u"
- using exp_gt_zero .
- finally have "exp u \<noteq> x"
- by auto }
- then show "log b x = log b 0"
- by (simp add: log_def ln_real_def)
-qed
-
-lemma log_mult_eq:
- "log b (A * B) = (if 0 < A * B then log b \<bar>A\<bar> + log b \<bar>B\<bar> else log b 0)"
- using log_mult[of "\<bar>A\<bar>" "\<bar>B\<bar>"] b_gt_1 log_neg_const[of "A * B"]
- by (auto simp: zero_less_mult_iff mult_le_0_iff)
-
-lemma log_inverse_eq:
- "log b (inverse B) = (if 0 < B then - log b B else log b 0)"
- using ln_inverse log_def log_neg_const by force
-
-lemma log_divide_eq:
- "log b (A / B) = (if 0 < A * B then (log b \<bar>A\<bar>) - log b \<bar>B\<bar> else log b 0)"
- unfolding divide_inverse log_mult_eq log_inverse_eq abs_inverse
- by (auto simp: zero_less_mult_iff mult_le_0_iff)
-
-lemmas log_simps = log_mult_eq log_inverse_eq log_divide_eq
-
-end
+lemmas log_simps = log_mult log_inverse log_divide
subsection "Kullback$-$Leibler divergence"
@@ -575,10 +550,16 @@
by (auto intro: distributed_real_measurable measurable_fst'' measurable_snd''
intro!: borel_measurable_times borel_measurable_log borel_measurable_divide)
ultimately have int: "integrable (S \<Otimes>\<^sub>M T) f"
- apply (rule integrable_cong_AE_imp)
- using A B
- by (auto simp: f_def log_divide_eq log_mult_eq field_simps space_pair_measure Px_nn Py_nn Pxy_nn
- less_le)
+ proof (rule integrable_cong_AE_imp)
+ show "AE x in S \<Otimes>\<^sub>M
+ T. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x))
+ = f x"
+ using A B unfolding f_def
+
+ apply (auto simp: f_def field_simps space_pair_measure Px_nn Py_nn Pxy_nn
+ less_le )
+ by (smt (verit, del_insts) AE_I2 distrib_left log_divide log_mult mult_eq_0_iff)
+ qed
show "0 \<le> ?M" unfolding M
proof (intro ST.KL_density_density_nonneg)
@@ -828,7 +809,7 @@
have "integrable MX (\<lambda>x. Px x * log b (1 / Px x)) =
integrable MX (\<lambda>x. - Px x * log b (Px x))"
using Px
- by (intro integrable_cong_AE) (auto simp: log_divide_eq less_le)
+ by (intro integrable_cong_AE) (auto simp: log_divide_pos log_recip)
then show "integrable (distr M MX X) (\<lambda>x. - log b (1 / Px x))"
unfolding distributed_distr_eq_density[OF X]
using Px int
@@ -836,7 +817,7 @@
qed (auto simp: minus_log_convex[OF b_gt_1])
also have "\<dots> = (\<integral> x. log b (Px x) \<partial>distr M MX X)"
unfolding distributed_distr_eq_density[OF X] using Px
- by (intro integral_cong_AE) (auto simp: AE_density log_divide_eq)
+ by (intro integral_cong_AE) (auto simp: AE_density log_divide_pos)
also have "\<dots> = - entropy b MX X"
unfolding distributed_distr_eq_density[OF X] using Px
by (subst entropy_distr[OF X]) (auto simp: integral_density)
@@ -872,7 +853,7 @@
have eq: "(\<integral> x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \<partial>MX) =
(\<integral> x. (- log b (measure MX A) / measure MX A) * indicator A x \<partial>MX)"
using uniform_distributed_params[OF X]
- by (intro Bochner_Integration.integral_cong) (auto split: split_indicator simp: log_divide_eq zero_less_measure_iff)
+ by (intro Bochner_Integration.integral_cong) (auto split: split_indicator simp: log_divide_pos zero_less_measure_iff)
show "- (\<integral> x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \<partial>MX) =
log b (measure MX A)"
unfolding eq using uniform_distributed_params[OF X]
@@ -1116,9 +1097,9 @@
have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. Pxyz x * - log b (?f x))"
apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3])
using ae1 ae2 ae3 ae4
- apply (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps
- less_le space_pair_measure)
- done
+ apply (auto simp: log_divide log_mult zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps
+ less_le space_pair_measure split: prod.split)
+ by (smt (verit, best) AE_I2 fst_conv log_divide mult_cancel_left mult_minus_right snd_conv)
then
show "integrable ?P (\<lambda>x. - log b (?f x))"
by (subst integrable_real_density) (auto simp: space_pair_measure)
@@ -1131,9 +1112,9 @@
apply simp
apply (intro integral_cong_AE)
using ae1 ae2 ae3 ae4
- apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps
- space_pair_measure less_le)
- done
+ apply (auto simp: zero_less_mult_iff zero_less_divide_iff field_simps
+ space_pair_measure less_le split: prod.split)
+ by (smt (verit, best) AE_I2 fst_conv log_divide mult_eq_0_iff mult_minus_right snd_conv)
finally show ?nonneg
by simp
qed
@@ -1233,8 +1214,10 @@
ultimately have I1: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))"
apply (rule integrable_cong_AE_imp)
using ae1 ae4 Px_nn Pyz_nn Pxyz_nn
- by (auto simp: log_divide_eq log_mult_eq field_simps zero_less_mult_iff space_pair_measure less_le)
-
+ apply (auto simp: log_divide log_mult field_simps zero_less_mult_iff space_pair_measure less_le
+ split: prod.split)
+ apply (intro AE_I2)
+ by (smt (verit, best) distrib_left divide_eq_0_iff fst_conv log_mult mult.commute nonzero_mult_div_cancel_left snd_conv times_divide_eq_right)
have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)
(\<lambda>x. Pxyz x * log b (Pxz (fst x, snd (snd x))) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pz (snd (snd x))))"
using finite_entropy_integrable_transform[OF Fxz Pxyz Pxyz_nn, of "\<lambda>x. (fst x, snd (snd x))"]
@@ -1247,7 +1230,41 @@
ultimately have I2: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))"
apply (rule integrable_cong_AE_imp)
using ae1 ae2 ae3 ae4 Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn
- by (auto simp: log_divide_eq log_mult_eq field_simps zero_less_mult_iff less_le space_pair_measure)
+ apply(auto simp: field_simps zero_less_mult_iff less_le space_pair_measure split: prod.split)
+ apply (intro AE_I2)
+ apply clarify
+ proof -
+ fix a :: 'c and aa :: 'd and ba :: 'e and x1 :: 'c and ab :: 'd and baa :: 'e
+ assume a1: "Pxz (fst (x1, ab, baa), snd (snd (x1, ab, baa))) = 0 \<longrightarrow> Pxyz (x1, ab, baa) = 0"
+ assume a2: "Pz (snd (snd (x1, ab, baa))) = 0 \<longrightarrow> Pxyz (x1, ab, baa) = 0"
+ assume a3: "Px (fst (x1, ab, baa)) = 0 \<longrightarrow> Pxyz (x1, ab, baa) = 0"
+ have f4: "\<forall>r ra rb. (ra::real) * (rb + r) = rb * ra + ra * r"
+ by (simp add: vector_space_over_itself.scale_right_distrib)
+ have f5: "0 \<noteq> Px x1 \<or> 0 = Pxyz (x1, ab, baa)"
+ using a3 by auto
+ have f6: "0 \<noteq> Pz baa \<or> 0 = Pxyz (x1, ab, baa)"
+ using a2 by force
+ have f7: "\<forall>r. (0::real) = 0 * r"
+ by simp
+ have "0 = Px x1 * Pz baa \<longrightarrow> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
+ using f6 f5 by simp
+ moreover
+ { assume "0 \<noteq> Px x1 * Pz baa"
+ moreover
+ { assume "0 \<noteq> Px x1 * Pz baa \<and> 0 \<noteq> Pxyz (x1, ab, baa)"
+ then have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1 * Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa))) \<and> 0 \<noteq> Pxz (x1, baa) / (Px x1 * Pz baa)"
+ using a1 by (metis (no_types) divide_eq_0_iff fst_eqD log_mult_nz mult.commute nonzero_eq_divide_eq snd_eqD)
+ moreover
+ { assume "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + (log b 0 + log b 0)) \<and> 0 \<noteq> Pxz (x1, baa) / (Px x1 * Pz baa) \<and> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pz baa * (Pxz (x1, baa) / (Px x1 * Pz baa)))"
+ then have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
+ using f7 f4 by (metis (no_types) divide_eq_0_iff log_mult_nz mult.commute) }
+ ultimately have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa))) \<or> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
+ using f4 by (simp add: log_mult_nz) }
+ ultimately have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa))) \<or> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa)) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)) \<or> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
+ using f7 by (metis (no_types)) }
+ ultimately show "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
+ by argo
+ qed
from ae I1 I2 show ?eq
unfolding conditional_mutual_information_def mi_eq
@@ -1355,8 +1372,12 @@
have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. Pxyz x * - log b (?f x))"
apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3])
using Pz_nn Pxz_nn Pyz_nn Pxyz_nn ae2 ae3 ae4
- by (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff
- zero_less_divide_iff field_simps space_pair_measure less_le)
+ apply (auto simp: log_divide_nz log_mult_nz zero_le_mult_iff zero_less_mult_iff
+ zero_less_divide_iff field_simps space_pair_measure less_le split: prod.split)
+ apply (intro AE_I2)
+ apply (auto simp: )
+ by (smt (verit, best) log_divide minus_mult_minus mult_minus_left no_zero_divisors)
+
then
show "integrable ?P (\<lambda>x. - log b (?f x))"
using Pxyz_nn by (auto simp: integrable_real_density)
@@ -1370,9 +1391,10 @@
apply simp
apply (intro integral_cong_AE)
using ae1 ae2 ae3 ae4
- apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff
- field_simps space_pair_measure less_le integral_cong_AE)
- done
+ apply (auto simp: log_divide_pos zero_less_mult_iff zero_less_divide_iff
+ field_simps space_pair_measure less_le integral_cong_AE split: prod.split)
+ apply (intro AE_I2)
+ by (metis divide_divide_eq_right log_recip mult_1 mult_minus_right)
finally show ?nonneg
by simp
qed
@@ -1632,9 +1654,6 @@
apply (subst conditional_entropy_eq[OF Py Pxy])
apply (auto intro!: sum.cong simp: Pxxy_eq sum_negf[symmetric] eq sum.reindex[OF inj]
log_simps zero_less_mult_iff zero_le_mult_iff field_simps mult_less_0_iff AE_count_space)
- using Py[THEN simple_distributed] Pxy[THEN simple_distributed]
- apply (auto simp add: not_le AE_count_space less_le antisym
- simple_distributed_nonneg[OF Py] simple_distributed_nonneg[OF Pxy])
done
qed