src/HOL/Probability/Information.thy
changeset 80528 6dec6b1f31f5
parent 80521 5c691b178e08
child 80539 34a5ca6fcddd
equal deleted inserted replaced
80523:532156e8f15f 80528:6dec6b1f31f5
     7 
     7 
     8 theory Information
     8 theory Information
     9 imports
     9 imports
    10   Independent_Family
    10   Independent_Family
    11 begin
    11 begin
    12 
       
    13 
       
    14 lemma log_mult_nz:
       
    15   "x\<noteq>0 \<Longrightarrow> y\<noteq>0 \<Longrightarrow> log a (x * y) = log a x + log a y"
       
    16   by (simp add: log_def ln_mult divide_inverse distrib_right)
       
    17 
       
    18 lemma log_divide_nz:
       
    19   "x\<noteq>0 \<Longrightarrow> y\<noteq>0 \<Longrightarrow> log a (x / y) = log a x - log a y"
       
    20   by (simp add: diff_divide_distrib ln_div log_def)
       
    21 
    12 
    22 subsection "Information theory"
    13 subsection "Information theory"
    23 
    14 
    24 locale information_space = prob_space +
    15 locale information_space = prob_space +
    25   fixes b :: real assumes b_gt_1: "1 < b"
    16   fixes b :: real assumes b_gt_1: "1 < b"
   552   ultimately have int: "integrable (S \<Otimes>\<^sub>M T) f"
   543   ultimately have int: "integrable (S \<Otimes>\<^sub>M T) f"
   553   proof (rule integrable_cong_AE_imp)
   544   proof (rule integrable_cong_AE_imp)
   554     show "AE x in S \<Otimes>\<^sub>M
   545     show "AE x in S \<Otimes>\<^sub>M
   555             T. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)) 
   546             T. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)) 
   556           = f x"
   547           = f x"
   557       using A B unfolding f_def
   548       using A B 
   558 
   549       by (auto simp: f_def field_simps space_pair_measure log_mult log_divide)
   559       apply  (auto simp: f_def field_simps space_pair_measure Px_nn Py_nn Pxy_nn
       
   560           less_le )
       
   561       by (smt (verit, del_insts) AE_I2 distrib_left log_divide log_mult mult_eq_0_iff)
       
   562   qed
   550   qed
   563 
   551 
   564   show "0 \<le> ?M" unfolding M
   552   show "0 \<le> ?M" unfolding M
   565   proof (intro ST.KL_density_density_nonneg)
   553   proof (intro ST.KL_density_density_nonneg)
   566     show "prob_space (density (S \<Otimes>\<^sub>M T) (\<lambda>x. ennreal (Pxy x))) "
   554     show "prob_space (density (S \<Otimes>\<^sub>M T) (\<lambda>x. ennreal (Pxy x))) "
  1095       unfolding real_integrable_def
  1083       unfolding real_integrable_def
  1096       using fin neg by (auto simp: split_beta')
  1084       using fin neg by (auto simp: split_beta')
  1097     have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. Pxyz x * - log b (?f x))"
  1085     have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. Pxyz x * - log b (?f x))"
  1098       apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3])
  1086       apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3])
  1099       using ae1 ae2 ae3 ae4 
  1087       using ae1 ae2 ae3 ae4 
  1100       apply (auto simp: log_divide log_mult zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps
  1088       by (auto simp: log_mult log_divide field_simps)
  1101           less_le space_pair_measure split: prod.split)
       
  1102       by (smt (verit, best) AE_I2 fst_conv log_divide mult_cancel_left mult_minus_right snd_conv)
       
  1103     then
  1089     then
  1104     show "integrable ?P (\<lambda>x. - log b (?f x))"
  1090     show "integrable ?P (\<lambda>x. - log b (?f x))"
  1105       by (subst integrable_real_density) (auto simp: space_pair_measure) 
  1091       by (subst integrable_real_density) (auto simp: space_pair_measure) 
  1106   qed (auto simp: b_gt_1 minus_log_convex)
  1092   qed (auto simp: b_gt_1 minus_log_convex)
  1107   also have "\<dots> = conditional_mutual_information b S T P X Y Z"
  1093   also have "\<dots> = conditional_mutual_information b S T P X Y Z"
  1110        apply simp
  1096        apply simp
  1111       apply (force simp: space_pair_measure) 
  1097       apply (force simp: space_pair_measure) 
  1112      apply simp
  1098      apply simp
  1113     apply (intro integral_cong_AE)
  1099     apply (intro integral_cong_AE)
  1114     using ae1 ae2 ae3 ae4
  1100     using ae1 ae2 ae3 ae4
  1115       apply (auto simp: zero_less_mult_iff zero_less_divide_iff field_simps
  1101     by (auto simp: field_simps log_divide)
  1116         space_pair_measure less_le split: prod.split)
       
  1117       by (smt (verit, best) AE_I2 fst_conv log_divide mult_eq_0_iff mult_minus_right snd_conv)
       
  1118   finally show ?nonneg
  1102   finally show ?nonneg
  1119     by simp
  1103     by simp
  1120 qed
  1104 qed
  1121 
  1105 
  1122 lemma (in information_space)
  1106 lemma (in information_space)
  1212   moreover have "(\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z)))) \<in> borel_measurable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)"
  1196   moreover have "(\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z)))) \<in> borel_measurable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)"
  1213     using Pxyz Px Pyz by simp
  1197     using Pxyz Px Pyz by simp
  1214   ultimately have I1: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))"
  1198   ultimately have I1: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))"
  1215     apply (rule integrable_cong_AE_imp)
  1199     apply (rule integrable_cong_AE_imp)
  1216     using ae1 ae4 Px_nn Pyz_nn Pxyz_nn 
  1200     using ae1 ae4 Px_nn Pyz_nn Pxyz_nn 
  1217     apply (auto simp: log_divide log_mult field_simps zero_less_mult_iff space_pair_measure less_le
  1201     by (auto simp: log_divide log_mult field_simps)
  1218          split: prod.split)
       
  1219     apply (intro AE_I2)
       
  1220     by (smt (verit, best) distrib_left divide_eq_0_iff fst_conv log_mult mult.commute nonzero_mult_div_cancel_left snd_conv times_divide_eq_right)
       
  1221   have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)
  1202   have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)
  1222     (\<lambda>x. Pxyz x * log b (Pxz (fst x, snd (snd x))) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pz (snd (snd x))))"
  1203     (\<lambda>x. Pxyz x * log b (Pxz (fst x, snd (snd x))) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pz (snd (snd x))))"
  1223     using finite_entropy_integrable_transform[OF Fxz Pxyz Pxyz_nn, of "\<lambda>x. (fst x, snd (snd x))"]
  1204     using finite_entropy_integrable_transform[OF Fxz Pxyz Pxyz_nn, of "\<lambda>x. (fst x, snd (snd x))"]
  1224     using finite_entropy_integrable_transform[OF Fx Pxyz Pxyz_nn, of fst]
  1205     using finite_entropy_integrable_transform[OF Fx Pxyz Pxyz_nn, of fst]
  1225     using finite_entropy_integrable_transform[OF Fz Pxyz Pxyz_nn, of "snd \<circ> snd"]
  1206     using finite_entropy_integrable_transform[OF Fz Pxyz Pxyz_nn, of "snd \<circ> snd"]
  1228     using Pxyz Px Pz
  1209     using Pxyz Px Pz
  1229     by auto
  1210     by auto
  1230   ultimately have I2: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))"
  1211   ultimately have I2: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))"
  1231     apply (rule integrable_cong_AE_imp)
  1212     apply (rule integrable_cong_AE_imp)
  1232     using ae1 ae2 ae3 ae4  Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn
  1213     using ae1 ae2 ae3 ae4  Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn
  1233     apply(auto simp:   field_simps zero_less_mult_iff less_le space_pair_measure split: prod.split)
  1214     apply(auto simp:  field_simps log_divide log_mult)
  1234     apply (intro AE_I2)
  1215     done
  1235     apply clarify
       
  1236   proof -
       
  1237     fix a :: 'c and aa :: 'd and ba :: 'e and x1 :: 'c and ab :: 'd and baa :: 'e
       
  1238     assume a1: "Pxz (fst (x1, ab, baa), snd (snd (x1, ab, baa))) = 0 \<longrightarrow> Pxyz (x1, ab, baa) = 0"
       
  1239     assume a2: "Pz (snd (snd (x1, ab, baa))) = 0 \<longrightarrow> Pxyz (x1, ab, baa) = 0"
       
  1240     assume a3: "Px (fst (x1, ab, baa)) = 0 \<longrightarrow> Pxyz (x1, ab, baa) = 0"
       
  1241     have f4: "\<forall>r ra rb. (ra::real) * (rb + r) = rb * ra + ra * r"
       
  1242       by (simp add: vector_space_over_itself.scale_right_distrib)
       
  1243     have f5: "0 \<noteq> Px x1 \<or> 0 = Pxyz (x1, ab, baa)"
       
  1244       using a3 by auto
       
  1245     have f6: "0 \<noteq> Pz baa \<or> 0 = Pxyz (x1, ab, baa)"
       
  1246       using a2 by force
       
  1247     have f7: "\<forall>r. (0::real) = 0 * r"
       
  1248       by simp
       
  1249     have "0 = Px x1 * Pz baa \<longrightarrow> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
       
  1250       using f6 f5 by simp
       
  1251     moreover
       
  1252     { assume "0 \<noteq> Px x1 * Pz baa"
       
  1253       moreover
       
  1254       { assume "0 \<noteq> Px x1 * Pz baa \<and> 0 \<noteq> Pxyz (x1, ab, baa)"
       
  1255         then have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1 * Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa))) \<and> 0 \<noteq> Pxz (x1, baa) / (Px x1 * Pz baa)"
       
  1256           using a1 by (metis (no_types) divide_eq_0_iff fst_eqD log_mult_nz mult.commute nonzero_eq_divide_eq snd_eqD)
       
  1257         moreover
       
  1258         { assume "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + (log b 0 + log b 0)) \<and> 0 \<noteq> Pxz (x1, baa) / (Px x1 * Pz baa) \<and> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pz baa * (Pxz (x1, baa) / (Px x1 * Pz baa)))"
       
  1259           then have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
       
  1260             using f7 f4 by (metis (no_types) divide_eq_0_iff log_mult_nz mult.commute) }
       
  1261         ultimately have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa))) \<or> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
       
  1262           using f4 by (simp add: log_mult_nz) }
       
  1263       ultimately have "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa) + log b (Pxz (x1, baa) / (Px x1 * Pz baa))) \<or> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = Pxyz (x1, ab, baa) * (log b (Px x1) + log b (Pz baa)) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)) \<or> Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
       
  1264         using f7 by (metis (no_types)) }
       
  1265     ultimately show "Pxyz (x1, ab, baa) * log b (Pxz (x1, baa)) = log b (Px x1) * Pxyz (x1, ab, baa) + (log b (Pz baa) * Pxyz (x1, ab, baa) + Pxyz (x1, ab, baa) * log b (Pxz (x1, baa) / (Px x1 * Pz baa)))"
       
  1266       by argo
       
  1267   qed
       
  1268 
       
  1269   from ae I1 I2 show ?eq
  1216   from ae I1 I2 show ?eq
  1270     unfolding conditional_mutual_information_def mi_eq
  1217     unfolding conditional_mutual_information_def mi_eq
  1271     apply (subst mutual_information_distr[OF S TP Px Px_nn Pyz Pyz_nn Pxyz Pxyz_nn]; simp add: space_pair_measure)
  1218     apply (subst mutual_information_distr[OF S TP Px Px_nn Pyz Pyz_nn Pxyz Pxyz_nn]; simp add: space_pair_measure)
  1272     apply (subst Bochner_Integration.integral_diff[symmetric])
  1219     apply (subst Bochner_Integration.integral_diff[symmetric])
  1273       apply (auto intro!: integral_cong_AE simp: split_beta' simp del: Bochner_Integration.integral_diff)
  1220       apply (auto intro!: integral_cong_AE simp: split_beta' simp del: Bochner_Integration.integral_diff)
  1370       unfolding real_integrable_def
  1317       unfolding real_integrable_def
  1371       using fin neg by (auto simp: split_beta')
  1318       using fin neg by (auto simp: split_beta')
  1372     have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. Pxyz x * - log b (?f x))"
  1319     have "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. Pxyz x * - log b (?f x))"
  1373       apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3])
  1320       apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3])
  1374       using Pz_nn Pxz_nn Pyz_nn Pxyz_nn ae2 ae3 ae4 
  1321       using Pz_nn Pxz_nn Pyz_nn Pxyz_nn ae2 ae3 ae4 
  1375       apply (auto simp: log_divide_nz log_mult_nz zero_le_mult_iff zero_less_mult_iff
  1322       by (auto simp: log_divide field_simps)
  1376           zero_less_divide_iff field_simps space_pair_measure less_le split: prod.split)
  1323     then show "integrable ?P (\<lambda>x. - log b (?f x))"
  1377     apply (intro AE_I2)
       
  1378       apply (auto simp: )
       
  1379       by (smt (verit, best) log_divide minus_mult_minus mult_minus_left no_zero_divisors)
       
  1380 
       
  1381     then
       
  1382     show "integrable ?P (\<lambda>x. - log b (?f x))"
       
  1383       using Pxyz_nn by (auto simp: integrable_real_density)
  1324       using Pxyz_nn by (auto simp: integrable_real_density)
  1384   qed (auto simp: b_gt_1 minus_log_convex)
  1325   qed (auto simp: b_gt_1 minus_log_convex)
  1385   also have "\<dots> = conditional_mutual_information b S T P X Y Z"
  1326   also have "\<dots> = conditional_mutual_information b S T P X Y Z"
  1386     unfolding \<open>?eq\<close>
  1327     unfolding \<open>?eq\<close>
  1387     using Pz_nn Pxz_nn Pyz_nn Pxyz_nn
  1328     using Pz_nn Pxz_nn Pyz_nn Pxyz_nn
  1389     apply simp
  1330     apply simp
  1390     apply simp
  1331     apply simp
  1391     apply simp
  1332     apply simp
  1392     apply (intro integral_cong_AE)
  1333     apply (intro integral_cong_AE)
  1393     using ae1 ae2 ae3 ae4 
  1334     using ae1 ae2 ae3 ae4 
  1394     apply (auto simp: log_divide_pos zero_less_mult_iff zero_less_divide_iff
  1335     by (auto simp: log_divide zero_less_mult_iff field_simps)
  1395                       field_simps space_pair_measure less_le integral_cong_AE split: prod.split)
       
  1396     apply (intro AE_I2)
       
  1397     by (metis divide_divide_eq_right log_recip mult_1 mult_minus_right)
       
  1398   finally show ?nonneg
  1336   finally show ?nonneg
  1399     by simp
  1337     by simp
  1400 qed
  1338 qed
  1401 
  1339 
  1402 lemma (in information_space) conditional_mutual_information_eq:
  1340 lemma (in information_space) conditional_mutual_information_eq: