src/HOL/Probability/Information.thy
changeset 80521 5c691b178e08
parent 80034 95b4fb2b5359
child 80528 6dec6b1f31f5
--- a/src/HOL/Probability/Information.thy	Fri Jul 05 21:59:43 2024 +0100
+++ b/src/HOL/Probability/Information.thy	Sat Jul 06 12:51:31 2024 +0100
@@ -10,47 +10,22 @@
   Independent_Family
 begin
 
+
+lemma log_mult_nz:
+  "x\<noteq>0 \<Longrightarrow> y\<noteq>0 \<Longrightarrow> log a (x * y) = log a x + log a y"
+  by (simp add: log_def ln_mult divide_inverse distrib_right)
+
+lemma log_divide_nz:
+  "x\<noteq>0 \<Longrightarrow> y\<noteq>0 \<Longrightarrow> log a (x / y) = log a x - log a y"
+  by (simp add: diff_divide_distrib ln_div log_def)
+
 subsection "Information theory"
 
 locale information_space = prob_space +
   fixes b :: real assumes b_gt_1: "1 < b"
 
-context information_space
-begin
-
 text \<open>Introduce some simplification rules for logarithm of base \<^term>\<open>b\<close>.\<close>
-
-lemma log_neg_const:
-  assumes "x \<le> 0"
-  shows "log b x = log b 0"
-proof -
-  { fix u :: real
-    have "x \<le> 0" by fact
-    also have "0 < exp u"
-      using exp_gt_zero .
-    finally have "exp u \<noteq> x"
-      by auto }
-  then show "log b x = log b 0"
-    by (simp add: log_def ln_real_def)
-qed
-
-lemma log_mult_eq:
-  "log b (A * B) = (if 0 < A * B then log b \<bar>A\<bar> + log b \<bar>B\<bar> else log b 0)"
-  using log_mult[of "\<bar>A\<bar>" "\<bar>B\<bar>"] b_gt_1 log_neg_const[of "A * B"]
-  by (auto simp: zero_less_mult_iff mult_le_0_iff)
-
-lemma log_inverse_eq:
-  "log b (inverse B) = (if 0 < B then - log b B else log b 0)"
-  using ln_inverse log_def log_neg_const by force
-
-lemma log_divide_eq:
-  "log b (A / B) = (if 0 < A * B then (log b \<bar>A\<bar>) - log b \<bar>B\<bar> else log b 0)"
-  unfolding divide_inverse log_mult_eq log_inverse_eq abs_inverse
-  by (auto simp: zero_less_mult_iff mult_le_0_iff)
-
-lemmas log_simps = log_mult_eq log_inverse_eq log_divide_eq
-
-end
+lemmas log_simps = log_mult log_inverse log_divide
 
 subsection "Kullback$-$Leibler divergence"
 
@@ -575,10 +550,16 @@
     by (auto intro: distributed_real_measurable measurable_fst'' measurable_snd''
       intro!: borel_measurable_times borel_measurable_log borel_measurable_divide)
   ultimately have int: "integrable (S \<Otimes>\<^sub>M T) f"
-    apply (rule integrable_cong_AE_imp)
-    using A B
-    by  (auto simp: f_def log_divide_eq log_mult_eq field_simps space_pair_measure Px_nn Py_nn Pxy_nn
-                  less_le)
+  proof (rule integrable_cong_AE_imp)
+    show "AE x in S \<Otimes>\<^sub>M
+            T. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)) 
+          = f x"
+      using A B unfolding f_def
+
+      apply  (auto simp: f_def field_simps space_pair_measure Px_nn Py_nn Pxy_nn
+          less_le )
+      by (smt (verit, del_insts) AE_I2 distrib_left log_divide log_mult mult_eq_0_iff)
+  qed
 
   show "0 \<le> ?M" unfolding M
   proof (intro ST.KL_density_density_nonneg)
@@ -828,7 +809,7 @@
     have "integrable MX (\<lambda>x. Px x * log b (1 / Px x)) =
       integrable MX (\<lambda>x. - Px x * log b (Px x))"
       using Px
-      by (intro integrable_cong_AE) (auto simp: log_divide_eq less_le)
+      by (intro integrable_cong_AE) (auto simp: log_divide_pos log_recip)
     then show "integrable (distr M MX X) (\<lambda>x. - log b (1 / Px x))"
       unfolding distributed_distr_eq_density[OF X]
       using Px int
@@ -836,7 +817,7 @@
   qed (auto simp: minus_log_convex[OF b_gt_1])
   also have "\<dots> = (\<integral> x. log b (Px x) \<partial>distr M MX X)"
     unfolding distributed_distr_eq_density[OF X] using Px
-    by (intro integral_cong_AE) (auto simp: AE_density log_divide_eq)
+    by (intro integral_cong_AE) (auto simp: AE_density log_divide_pos)
   also have "\<dots> = - entropy b MX X"
     unfolding distributed_distr_eq_density[OF X] using Px
     by (subst entropy_distr[OF X]) (auto simp: integral_density)
@@ -872,7 +853,7 @@
   have eq: "(\<integral> x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \<partial>MX) =
     (\<integral> x. (- log b (measure MX A) / measure MX A) * indicator A x \<partial>MX)"
     using uniform_distributed_params[OF X]
-    by (intro Bochner_Integration.integral_cong) (auto split: split_indicator simp: log_divide_eq zero_less_measure_iff)
+    by (intro Bochner_Integration.integral_cong) (auto split: split_indicator simp: log_divide_pos zero_less_measure_iff)
   show "- (\<integral> x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \<partial>MX) =
     log b (measure MX A)"
     unfolding eq using uniform_distributed_params[OF X]
@@ -1116,9 +1097,9 @@
     have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. Pxyz x * - log b (?f x))"
       apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3])
       using ae1 ae2 ae3 ae4 
-      apply (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps
-          less_le space_pair_measure)
-      done
+      apply (auto simp: log_divide log_mult zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps
+          less_le space_pair_measure split: prod.split)
+      by (smt (verit, best) AE_I2 fst_conv log_divide mult_cancel_left mult_minus_right snd_conv)
     then
     show "integrable ?P (\<lambda>x. - log b (?f x))"
       by (subst integrable_real_density) (auto simp: space_pair_measure) 
@@ -1131,9 +1112,9 @@
      apply simp
     apply (intro integral_cong_AE)
     using ae1 ae2 ae3 ae4
-      apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps
-        space_pair_measure less_le)
-    done
+      apply (auto simp: zero_less_mult_iff zero_less_divide_iff field_simps
+        space_pair_measure less_le split: prod.split)
+      by (smt (verit, best) AE_I2 fst_conv log_divide mult_eq_0_iff mult_minus_right snd_conv)
   finally show ?nonneg
     by simp
 qed
@@ -1233,8 +1214,10 @@
   ultimately have I1: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))"
     apply (rule integrable_cong_AE_imp)
     using ae1 ae4 Px_nn Pyz_nn Pxyz_nn 
-    by (auto simp: log_divide_eq log_mult_eq field_simps zero_less_mult_iff space_pair_measure less_le)
-
+    apply (auto simp: log_divide log_mult field_simps zero_less_mult_iff space_pair_measure less_le
+         split: prod.split)
+    apply (intro AE_I2)
+    by (smt (verit, best) distrib_left divide_eq_0_iff fst_conv log_mult mult.commute nonzero_mult_div_cancel_left snd_conv times_divide_eq_right)
   have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)
     (\<lambda>x. Pxyz x * log b (Pxz (fst x, snd (snd x))) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pz (snd (snd x))))"
     using finite_entropy_integrable_transform[OF Fxz Pxyz Pxyz_nn, of "\<lambda>x. (fst x, snd (snd x))"]
@@ -1247,7 +1230,41 @@
   ultimately have I2: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))"
     apply (rule integrable_cong_AE_imp)
     using ae1 ae2 ae3 ae4  Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn
-    by (auto simp: log_divide_eq log_mult_eq field_simps zero_less_mult_iff less_le space_pair_measure)
+    apply(auto simp:   field_simps zero_less_mult_iff less_le space_pair_measure split: prod.split)
+    apply (intro AE_I2)
+    apply clarify
+  proof -
+    fix a :: 'c and aa :: 'd and ba :: 'e and x1 :: 'c and ab :: 'd and baa :: 'e
+    assume a1: "Pxz (fst (x1, ab, baa), snd (snd (x1, ab, baa))) = 0 \<longrightarrow> Pxyz (x1, ab, baa) = 0"
+    assume a2: "Pz (snd (snd (x1, ab, baa))) = 0 \<longrightarrow> Pxyz (x1, ab, baa) = 0"
+    assume a3: "Px (fst (x1, ab, baa)) = 0 \<longrightarrow> Pxyz (x1, ab, baa) = 0"
+    have f4: "\<forall>r ra rb. (ra::real) * (rb + r) = rb * ra + ra * r"
+      by (simp add: vector_space_over_itself.scale_right_distrib)
+    have f5: "0 \<noteq> Px x1 \<or> 0 = Pxyz (x1, ab, baa)"
+      using a3 by auto
+    have f6: "0 \<noteq> Pz baa \<or> 0 = Pxyz (x1, ab, baa)"
+      using a2 by force
+    have f7: "\<forall>r. (0::real) = 0 * r"
+      by simp
+    have "0 = Px x1 * Pz baa \<longrightarrow> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
+      using f6 f5 by simp
+    moreover
+    { assume "0 \<noteq> Px x1 * Pz baa"
+      moreover
+      { assume "0 \<noteq> Px x1 * Pz baa \<and> 0 \<noteq> Pxyz (x1, ab, baa)"
+        then have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1 * Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa))) \<and> 0 \<noteq> Pxz (x1, baa) / (Px x1 * Pz baa)"
+          using a1 by (metis (no_types) divide_eq_0_iff fst_eqD log_mult_nz mult.commute nonzero_eq_divide_eq snd_eqD)
+        moreover
+        { assume "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + (log b 0 + log b 0)) \<and> 0 \<noteq> Pxz (x1, baa) / (Px x1 * Pz baa) \<and> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pz baa * (Pxz (x1, baa) / (Px x1 * Pz baa)))"
+          then have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
+            using f7 f4 by (metis (no_types) divide_eq_0_iff log_mult_nz mult.commute) }
+        ultimately have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa))) \<or> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
+          using f4 by (simp add: log_mult_nz) }
+      ultimately have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa))) \<or> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa)) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)) \<or> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
+        using f7 by (metis (no_types)) }
+    ultimately show "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
+      by argo
+  qed
 
   from ae I1 I2 show ?eq
     unfolding conditional_mutual_information_def mi_eq
@@ -1355,8 +1372,12 @@
     have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. Pxyz x * - log b (?f x))"
       apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3])
       using Pz_nn Pxz_nn Pyz_nn Pxyz_nn ae2 ae3 ae4 
-      by (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff
-          zero_less_divide_iff field_simps space_pair_measure less_le)
+      apply (auto simp: log_divide_nz log_mult_nz zero_le_mult_iff zero_less_mult_iff
+          zero_less_divide_iff field_simps space_pair_measure less_le split: prod.split)
+    apply (intro AE_I2)
+      apply (auto simp: )
+      by (smt (verit, best) log_divide minus_mult_minus mult_minus_left no_zero_divisors)
+
     then
     show "integrable ?P (\<lambda>x. - log b (?f x))"
       using Pxyz_nn by (auto simp: integrable_real_density)
@@ -1370,9 +1391,10 @@
     apply simp
     apply (intro integral_cong_AE)
     using ae1 ae2 ae3 ae4 
-    apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff
-                      field_simps space_pair_measure less_le integral_cong_AE)
-    done
+    apply (auto simp: log_divide_pos zero_less_mult_iff zero_less_divide_iff
+                      field_simps space_pair_measure less_le integral_cong_AE split: prod.split)
+    apply (intro AE_I2)
+    by (metis divide_divide_eq_right log_recip mult_1 mult_minus_right)
   finally show ?nonneg
     by simp
 qed
@@ -1632,9 +1654,6 @@
     apply (subst conditional_entropy_eq[OF Py Pxy])
     apply (auto intro!: sum.cong simp: Pxxy_eq sum_negf[symmetric] eq sum.reindex[OF inj]
                 log_simps zero_less_mult_iff zero_le_mult_iff field_simps mult_less_0_iff AE_count_space)
-    using Py[THEN simple_distributed] Pxy[THEN simple_distributed]
-    apply (auto simp add: not_le AE_count_space less_le antisym
-      simple_distributed_nonneg[OF Py] simple_distributed_nonneg[OF Pxy])
     done
 qed