author  wenzelm 
Thu, 18 Apr 2013 17:07:01 +0200  
changeset 51717  9e7d1c139569 
parent 50419  3177d0374701 
child 53015  a1119cf551e8 
permissions  rwrr 
42067  1 
(* Title: HOL/Probability/Information.thy 
2 
Author: Johannes Hölzl, TU München 

3 
Author: Armin Heller, TU München 

4 
*) 

5 

6 
header {*Information theory*} 

7 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

8 
theory Information 
41413
64cd30d6b0b8
explicit file specifications  avoid secondary load path;
wenzelm
parents:
41095
diff
changeset

9 
imports 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

10 
Independent_Family 
43556
0d78c8d31d0d
move conditional expectation to its own theory file
hoelzl
parents:
43340
diff
changeset

11 
Radon_Nikodym 
41413
64cd30d6b0b8
explicit file specifications  avoid secondary load path;
wenzelm
parents:
41095
diff
changeset

12 
"~~/src/HOL/Library/Convex" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

13 
begin 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

14 

39097  15 
lemma log_le: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log a x \<le> log a y" 
16 
by (subst log_le_cancel_iff) auto 

17 

18 
lemma log_less: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x < y \<Longrightarrow> log a x < log a y" 

19 
by (subst log_less_cancel_iff) auto 

20 

21 
lemma setsum_cartesian_product': 

22 
"(\<Sum>x\<in>A \<times> B. f x) = (\<Sum>x\<in>A. setsum (\<lambda>y. f (x, y)) B)" 

23 
unfolding setsum_cartesian_product by simp 

24 

25 
lemma split_pairs: 

40859  26 
"((A, B) = X) \<longleftrightarrow> (fst X = A \<and> snd X = B)" and 
27 
"(X = (A, B)) \<longleftrightarrow> (fst X = A \<and> snd X = B)" by auto 

38656  28 

29 
section "Information theory" 

30 

40859  31 
locale information_space = prob_space + 
38656  32 
fixes b :: real assumes b_gt_1: "1 < b" 
33 

40859  34 
context information_space 
38656  35 
begin 
36 

40859  37 
text {* Introduce some simplification rules for logarithm of base @{term b}. *} 
38 

39 
lemma log_neg_const: 

40 
assumes "x \<le> 0" 

41 
shows "log b x = log b 0" 

36624  42 
proof  
40859  43 
{ fix u :: real 
44 
have "x \<le> 0" by fact 

45 
also have "0 < exp u" 

46 
using exp_gt_zero . 

47 
finally have "exp u \<noteq> x" 

48 
by auto } 

49 
then show "log b x = log b 0" 

50 
by (simp add: log_def ln_def) 

38656  51 
qed 
52 

40859  53 
lemma log_mult_eq: 
54 
"log b (A * B) = (if 0 < A * B then log b \<bar>A\<bar> + log b \<bar>B\<bar> else log b 0)" 

55 
using log_mult[of b "\<bar>A\<bar>" "\<bar>B\<bar>"] b_gt_1 log_neg_const[of "A * B"] 

56 
by (auto simp: zero_less_mult_iff mult_le_0_iff) 

38656  57 

40859  58 
lemma log_inverse_eq: 
59 
"log b (inverse B) = (if 0 < B then  log b B else log b 0)" 

60 
using log_inverse[of b B] log_neg_const[of "inverse B"] b_gt_1 by simp 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

61 

40859  62 
lemma log_divide_eq: 
63 
"log b (A / B) = (if 0 < A * B then log b \<bar>A\<bar>  log b \<bar>B\<bar> else log b 0)" 

64 
unfolding divide_inverse log_mult_eq log_inverse_eq abs_inverse 

65 
by (auto simp: zero_less_mult_iff mult_le_0_iff) 

38656  66 

40859  67 
lemmas log_simps = log_mult_eq log_inverse_eq log_divide_eq 
38656  68 

69 
end 

70 

39097  71 
subsection "Kullback$$Leibler divergence" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

72 

39097  73 
text {* The Kullback$$Leibler divergence is also known as relative entropy or 
74 
Kullback$$Leibler distance. *} 

75 

76 
definition 

47694  77 
"entropy_density b M N = log b \<circ> real \<circ> RN_deriv M N" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

78 

60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

79 
definition 
47694  80 
"KL_divergence b M N = integral\<^isup>L N (entropy_density b M N)" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

81 

60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

82 
lemma (in information_space) measurable_entropy_density: 
47694  83 
assumes ac: "absolutely_continuous M N" "sets N = events" 
84 
shows "entropy_density b M N \<in> borel_measurable M" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

85 
proof  
47694  86 
from borel_measurable_RN_deriv[OF ac] b_gt_1 show ?thesis 
50003  87 
unfolding entropy_density_def by auto 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

88 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

89 

50003  90 
lemma borel_measurable_RN_deriv_density[measurable (raw)]: 
91 
"f \<in> borel_measurable M \<Longrightarrow> RN_deriv M (density M f) \<in> borel_measurable M" 

92 
using borel_measurable_RN_deriv_density[of "\<lambda>x. max 0 (f x )" M] 

93 
by (simp add: density_max_0[symmetric]) 

94 

47694  95 
lemma (in sigma_finite_measure) KL_density: 
96 
fixes f :: "'a \<Rightarrow> real" 

97 
assumes "1 < b" 

98 
assumes f: "f \<in> borel_measurable M" "AE x in M. 0 \<le> f x" 

99 
shows "KL_divergence b M (density M f) = (\<integral>x. f x * log b (f x) \<partial>M)" 

100 
unfolding KL_divergence_def 

101 
proof (subst integral_density) 

102 
show "entropy_density b M (density M (\<lambda>x. ereal (f x))) \<in> borel_measurable M" 

49776  103 
using f 
50003  104 
by (auto simp: comp_def entropy_density_def) 
47694  105 
have "density M (RN_deriv M (density M f)) = density M f" 
106 
using f by (intro density_RN_deriv_density) auto 

107 
then have eq: "AE x in M. RN_deriv M (density M f) x = f x" 

108 
using f 

109 
by (intro density_unique) 

110 
(auto intro!: borel_measurable_log borel_measurable_RN_deriv_density simp: RN_deriv_density_nonneg) 

111 
show "(\<integral>x. f x * entropy_density b M (density M (\<lambda>x. ereal (f x))) x \<partial>M) = (\<integral>x. f x * log b (f x) \<partial>M)" 

112 
apply (intro integral_cong_AE) 

113 
using eq 

114 
apply eventually_elim 

115 
apply (auto simp: entropy_density_def) 

116 
done 

117 
qed fact+ 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

118 

47694  119 
lemma (in sigma_finite_measure) KL_density_density: 
120 
fixes f g :: "'a \<Rightarrow> real" 

121 
assumes "1 < b" 

122 
assumes f: "f \<in> borel_measurable M" "AE x in M. 0 \<le> f x" 

123 
assumes g: "g \<in> borel_measurable M" "AE x in M. 0 \<le> g x" 

124 
assumes ac: "AE x in M. f x = 0 \<longrightarrow> g x = 0" 

125 
shows "KL_divergence b (density M f) (density M g) = (\<integral>x. g x * log b (g x / f x) \<partial>M)" 

126 
proof  

127 
interpret Mf: sigma_finite_measure "density M f" 

128 
using f by (subst sigma_finite_iff_density_finite) auto 

129 
have "KL_divergence b (density M f) (density M g) = 

130 
KL_divergence b (density M f) (density (density M f) (\<lambda>x. g x / f x))" 

131 
using f g ac by (subst density_density_divide) simp_all 

132 
also have "\<dots> = (\<integral>x. (g x / f x) * log b (g x / f x) \<partial>density M f)" 

133 
using f g `1 < b` by (intro Mf.KL_density) (auto simp: AE_density divide_nonneg_nonneg) 

134 
also have "\<dots> = (\<integral>x. g x * log b (g x / f x) \<partial>M)" 

135 
using ac f g `1 < b` by (subst integral_density) (auto intro!: integral_cong_AE) 

136 
finally show ?thesis . 

137 
qed 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

138 

47694  139 
lemma (in information_space) KL_gt_0: 
140 
fixes D :: "'a \<Rightarrow> real" 

141 
assumes "prob_space (density M D)" 

142 
assumes D: "D \<in> borel_measurable M" "AE x in M. 0 \<le> D x" 

143 
assumes int: "integrable M (\<lambda>x. D x * log b (D x))" 

144 
assumes A: "density M D \<noteq> M" 

145 
shows "0 < KL_divergence b M (density M D)" 

146 
proof  

147 
interpret N: prob_space "density M D" by fact 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

148 

47694  149 
obtain A where "A \<in> sets M" "emeasure (density M D) A \<noteq> emeasure M A" 
150 
using measure_eqI[of "density M D" M] `density M D \<noteq> M` by auto 

151 

152 
let ?D_set = "{x\<in>space M. D x \<noteq> 0}" 

153 
have [simp, intro]: "?D_set \<in> sets M" 

154 
using D by auto 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

155 

43920  156 
have D_neg: "(\<integral>\<^isup>+ x. ereal ( D x) \<partial>M) = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

157 
using D by (subst positive_integral_0_iff_AE) auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

158 

47694  159 
have "(\<integral>\<^isup>+ x. ereal (D x) \<partial>M) = emeasure (density M D) (space M)" 
160 
using D by (simp add: emeasure_density cong: positive_integral_cong) 

43920  161 
then have D_pos: "(\<integral>\<^isup>+ x. ereal (D x) \<partial>M) = 1" 
47694  162 
using N.emeasure_space_1 by simp 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

163 

47694  164 
have "integrable M D" "integral\<^isup>L M D = 1" 
165 
using D D_pos D_neg unfolding integrable_def lebesgue_integral_def by simp_all 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

166 

47694  167 
have "0 \<le> 1  measure M ?D_set" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

168 
using prob_le_1 by (auto simp: field_simps) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

169 
also have "\<dots> = (\<integral> x. D x  indicator ?D_set x \<partial>M)" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

170 
using `integrable M D` `integral\<^isup>L M D = 1` 
47694  171 
by (simp add: emeasure_eq_measure) 
172 
also have "\<dots> < (\<integral> x. D x * (ln b * log b (D x)) \<partial>M)" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

173 
proof (rule integral_less_AE) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

174 
show "integrable M (\<lambda>x. D x  indicator ?D_set x)" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

175 
using `integrable M D` 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

176 
by (intro integral_diff integral_indicator) auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

177 
next 
47694  178 
from integral_cmult(1)[OF int, of "ln b"] 
179 
show "integrable M (\<lambda>x. D x * (ln b * log b (D x)))" 

180 
by (simp add: ac_simps) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

181 
next 
47694  182 
show "emeasure M {x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} \<noteq> 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

183 
proof 
47694  184 
assume eq_0: "emeasure M {x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} = 0" 
185 
then have disj: "AE x in M. D x = 1 \<or> D x = 0" 

50244
de72bbe42190
qualified interpretation of sigma_algebra, to avoid name clashes
immler
parents:
50003
diff
changeset

186 
using D(1) by (auto intro!: AE_I[OF subset_refl] sets.sets_Collect) 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

187 

47694  188 
have "emeasure M {x\<in>space M. D x = 1} = (\<integral>\<^isup>+ x. indicator {x\<in>space M. D x = 1} x \<partial>M)" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

189 
using D(1) by auto 
47694  190 
also have "\<dots> = (\<integral>\<^isup>+ x. ereal (D x) \<partial>M)" 
43920  191 
using disj by (auto intro!: positive_integral_cong_AE simp: indicator_def one_ereal_def) 
47694  192 
finally have "AE x in M. D x = 1" 
193 
using D D_pos by (intro AE_I_eq_1) auto 

43920  194 
then have "(\<integral>\<^isup>+x. indicator A x\<partial>M) = (\<integral>\<^isup>+x. ereal (D x) * indicator A x\<partial>M)" 
195 
by (intro positive_integral_cong_AE) (auto simp: one_ereal_def[symmetric]) 

47694  196 
also have "\<dots> = density M D A" 
197 
using `A \<in> sets M` D by (simp add: emeasure_density) 

198 
finally show False using `A \<in> sets M` `emeasure (density M D) A \<noteq> emeasure M A` by simp 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

199 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

200 
show "{x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} \<in> sets M" 
50244
de72bbe42190
qualified interpretation of sigma_algebra, to avoid name clashes
immler
parents:
50003
diff
changeset

201 
using D(1) by (auto intro: sets.sets_Collect_conj) 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

202 

47694  203 
show "AE t in M. t \<in> {x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} \<longrightarrow> 
204 
D t  indicator ?D_set t \<noteq> D t * (ln b * log b (D t))" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

205 
using D(2) 
47694  206 
proof (eventually_elim, safe) 
207 
fix t assume Dt: "t \<in> space M" "D t \<noteq> 1" "D t \<noteq> 0" "0 \<le> D t" 

208 
and eq: "D t  indicator ?D_set t = D t * (ln b * log b (D t))" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

209 

60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

210 
have "D t  1 = D t  indicator ?D_set t" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

211 
using Dt by simp 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

212 
also note eq 
47694  213 
also have "D t * (ln b * log b (D t)) =  D t * ln (1 / D t)" 
214 
using b_gt_1 `D t \<noteq> 0` `0 \<le> D t` 

215 
by (simp add: log_def ln_div less_le) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

216 
finally have "ln (1 / D t) = 1 / D t  1" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

217 
using `D t \<noteq> 0` by (auto simp: field_simps) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

218 
from ln_eq_minus_one[OF _ this] `D t \<noteq> 0` `0 \<le> D t` `D t \<noteq> 1` 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

219 
show False by auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

220 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

221 

47694  222 
show "AE t in M. D t  indicator ?D_set t \<le> D t * (ln b * log b (D t))" 
223 
using D(2) AE_space 

224 
proof eventually_elim 

225 
fix t assume "t \<in> space M" "0 \<le> D t" 

226 
show "D t  indicator ?D_set t \<le> D t * (ln b * log b (D t))" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

227 
proof cases 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

228 
assume asm: "D t \<noteq> 0" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

229 
then have "0 < D t" using `0 \<le> D t` by auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

230 
then have "0 < 1 / D t" by auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

231 
have "D t  indicator ?D_set t \<le>  D t * (1 / D t  1)" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

232 
using asm `t \<in> space M` by (simp add: field_simps) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

233 
also have " D t * (1 / D t  1) \<le>  D t * ln (1 / D t)" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

234 
using ln_le_minus_one `0 < 1 / D t` by (intro mult_left_mono_neg) auto 
47694  235 
also have "\<dots> = D t * (ln b * log b (D t))" 
236 
using `0 < D t` b_gt_1 

237 
by (simp_all add: log_def ln_div) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

238 
finally show ?thesis by simp 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

239 
qed simp 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

240 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

241 
qed 
47694  242 
also have "\<dots> = (\<integral> x. ln b * (D x * log b (D x)) \<partial>M)" 
243 
by (simp add: ac_simps) 

244 
also have "\<dots> = ln b * (\<integral> x. D x * log b (D x) \<partial>M)" 

245 
using int by (rule integral_cmult) 

246 
finally show ?thesis 

247 
using b_gt_1 D by (subst KL_density) (auto simp: zero_less_mult_iff) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

248 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

249 

47694  250 
lemma (in sigma_finite_measure) KL_same_eq_0: "KL_divergence b M M = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

251 
proof  
47694  252 
have "AE x in M. 1 = RN_deriv M M x" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

253 
proof (rule RN_deriv_unique) 
47694  254 
show "(\<lambda>x. 1) \<in> borel_measurable M" "AE x in M. 0 \<le> (1 :: ereal)" by auto 
255 
show "density M (\<lambda>x. 1) = M" 

256 
apply (auto intro!: measure_eqI emeasure_density) 

257 
apply (subst emeasure_density) 

258 
apply auto 

259 
done 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

260 
qed 
47694  261 
then have "AE x in M. log b (real (RN_deriv M M x)) = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

262 
by (elim AE_mp) simp 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

263 
from integral_cong_AE[OF this] 
47694  264 
have "integral\<^isup>L M (entropy_density b M M) = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

265 
by (simp add: entropy_density_def comp_def) 
47694  266 
then show "KL_divergence b M M = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

267 
unfolding KL_divergence_def 
47694  268 
by auto 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

269 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

270 

47694  271 
lemma (in information_space) KL_eq_0_iff_eq: 
272 
fixes D :: "'a \<Rightarrow> real" 

273 
assumes "prob_space (density M D)" 

274 
assumes D: "D \<in> borel_measurable M" "AE x in M. 0 \<le> D x" 

275 
assumes int: "integrable M (\<lambda>x. D x * log b (D x))" 

276 
shows "KL_divergence b M (density M D) = 0 \<longleftrightarrow> density M D = M" 

277 
using KL_same_eq_0[of b] KL_gt_0[OF assms] 

278 
by (auto simp: less_le) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

279 

47694  280 
lemma (in information_space) KL_eq_0_iff_eq_ac: 
281 
fixes D :: "'a \<Rightarrow> real" 

282 
assumes "prob_space N" 

283 
assumes ac: "absolutely_continuous M N" "sets N = sets M" 

284 
assumes int: "integrable N (entropy_density b M N)" 

285 
shows "KL_divergence b M N = 0 \<longleftrightarrow> N = M" 

41833
563bea92b2c0
add lemma KL_divergence_vimage, mutual_information_generic
hoelzl
parents:
41689
diff
changeset

286 
proof  
47694  287 
interpret N: prob_space N by fact 
288 
have "finite_measure N" by unfold_locales 

289 
from real_RN_deriv[OF this ac] guess D . note D = this 

290 

291 
have "N = density M (RN_deriv M N)" 

292 
using ac by (rule density_RN_deriv[symmetric]) 

293 
also have "\<dots> = density M D" 

294 
using borel_measurable_RN_deriv[OF ac] D by (auto intro!: density_cong) 

295 
finally have N: "N = density M D" . 

41833
563bea92b2c0
add lemma KL_divergence_vimage, mutual_information_generic
hoelzl
parents:
41689
diff
changeset

296 

47694  297 
from absolutely_continuous_AE[OF ac(2,1) D(2)] D b_gt_1 ac measurable_entropy_density 
298 
have "integrable N (\<lambda>x. log b (D x))" 

299 
by (intro integrable_cong_AE[THEN iffD2, OF _ _ _ int]) 

300 
(auto simp: N entropy_density_def) 

301 
with D b_gt_1 have "integrable M (\<lambda>x. D x * log b (D x))" 

302 
by (subst integral_density(2)[symmetric]) (auto simp: N[symmetric] comp_def) 

303 
with `prob_space N` D show ?thesis 

304 
unfolding N 

305 
by (intro KL_eq_0_iff_eq) auto 

41833
563bea92b2c0
add lemma KL_divergence_vimage, mutual_information_generic
hoelzl
parents:
41689
diff
changeset

306 
qed 
563bea92b2c0
add lemma KL_divergence_vimage, mutual_information_generic
hoelzl
parents:
41689
diff
changeset

307 

47694  308 
lemma (in information_space) KL_nonneg: 
309 
assumes "prob_space (density M D)" 

310 
assumes D: "D \<in> borel_measurable M" "AE x in M. 0 \<le> D x" 

311 
assumes int: "integrable M (\<lambda>x. D x * log b (D x))" 

312 
shows "0 \<le> KL_divergence b M (density M D)" 

313 
using KL_gt_0[OF assms] by (cases "density M D = M") (auto simp: KL_same_eq_0) 

40859  314 

47694  315 
lemma (in sigma_finite_measure) KL_density_density_nonneg: 
316 
fixes f g :: "'a \<Rightarrow> real" 

317 
assumes "1 < b" 

318 
assumes f: "f \<in> borel_measurable M" "AE x in M. 0 \<le> f x" "prob_space (density M f)" 

319 
assumes g: "g \<in> borel_measurable M" "AE x in M. 0 \<le> g x" "prob_space (density M g)" 

320 
assumes ac: "AE x in M. f x = 0 \<longrightarrow> g x = 0" 

321 
assumes int: "integrable M (\<lambda>x. g x * log b (g x / f x))" 

322 
shows "0 \<le> KL_divergence b (density M f) (density M g)" 

323 
proof  

324 
interpret Mf: prob_space "density M f" by fact 

325 
interpret Mf: information_space "density M f" b by default fact 

326 
have eq: "density (density M f) (\<lambda>x. g x / f x) = density M g" (is "?DD = _") 

327 
using f g ac by (subst density_density_divide) simp_all 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

328 

47694  329 
have "0 \<le> KL_divergence b (density M f) (density (density M f) (\<lambda>x. g x / f x))" 
330 
proof (rule Mf.KL_nonneg) 

331 
show "prob_space ?DD" unfolding eq by fact 

332 
from f g show "(\<lambda>x. g x / f x) \<in> borel_measurable (density M f)" 

333 
by auto 

334 
show "AE x in density M f. 0 \<le> g x / f x" 

335 
using f g by (auto simp: AE_density divide_nonneg_nonneg) 

336 
show "integrable (density M f) (\<lambda>x. g x / f x * log b (g x / f x))" 

337 
using `1 < b` f g ac 

338 
by (subst integral_density) 

339 
(auto intro!: integrable_cong_AE[THEN iffD2, OF _ _ _ int] measurable_If) 

340 
qed 

341 
also have "\<dots> = KL_divergence b (density M f) (density M g)" 

342 
using f g ac by (subst density_density_divide) simp_all 

343 
finally show ?thesis . 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

344 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

345 

49803  346 
subsection {* Finite Entropy *} 
347 

348 
definition (in information_space) 

349 
"finite_entropy S X f \<longleftrightarrow> distributed M S X f \<and> integrable S (\<lambda>x. f x * log b (f x))" 

350 

351 
lemma (in information_space) finite_entropy_simple_function: 

352 
assumes X: "simple_function M X" 

353 
shows "finite_entropy (count_space (X`space M)) X (\<lambda>a. measure M {x \<in> space M. X x = a})" 

354 
unfolding finite_entropy_def 

355 
proof 

356 
have [simp]: "finite (X ` space M)" 

357 
using X by (auto simp: simple_function_def) 

358 
then show "integrable (count_space (X ` space M)) 

359 
(\<lambda>x. prob {xa \<in> space M. X xa = x} * log b (prob {xa \<in> space M. X xa = x}))" 

360 
by (rule integrable_count_space) 

361 
have d: "distributed M (count_space (X ` space M)) X (\<lambda>x. ereal (if x \<in> X`space M then prob {xa \<in> space M. X xa = x} else 0))" 

362 
by (rule distributed_simple_function_superset[OF X]) (auto intro!: arg_cong[where f=prob]) 

363 
show "distributed M (count_space (X ` space M)) X (\<lambda>x. ereal (prob {xa \<in> space M. X xa = x}))" 

364 
by (rule distributed_cong_density[THEN iffD1, OF _ _ _ d]) auto 

365 
qed 

366 

367 
lemma distributed_transform_AE: 

368 
assumes T: "T \<in> measurable P Q" "absolutely_continuous Q (distr P Q T)" 

369 
assumes g: "distributed M Q Y g" 

370 
shows "AE x in P. 0 \<le> g (T x)" 

371 
using g 

372 
apply (subst AE_distr_iff[symmetric, OF T(1)]) 

50003  373 
apply simp 
49803  374 
apply (rule absolutely_continuous_AE[OF _ T(2)]) 
375 
apply simp 

376 
apply (simp add: distributed_AE) 

377 
done 

378 

379 
lemma ac_fst: 

380 
assumes "sigma_finite_measure T" 

381 
shows "absolutely_continuous S (distr (S \<Otimes>\<^isub>M T) S fst)" 

382 
proof  

383 
interpret sigma_finite_measure T by fact 

384 
{ fix A assume "A \<in> sets S" "emeasure S A = 0" 

385 
moreover then have "fst ` A \<inter> space (S \<Otimes>\<^isub>M T) = A \<times> space T" 

50244
de72bbe42190
qualified interpretation of sigma_algebra, to avoid name clashes
immler
parents:
50003
diff
changeset

386 
by (auto simp: space_pair_measure dest!: sets.sets_into_space) 
49803  387 
ultimately have "emeasure (S \<Otimes>\<^isub>M T) (fst ` A \<inter> space (S \<Otimes>\<^isub>M T)) = 0" 
388 
by (simp add: emeasure_pair_measure_Times) } 

389 
then show ?thesis 

390 
unfolding absolutely_continuous_def 

391 
apply (auto simp: null_sets_distr_iff) 

392 
apply (auto simp: null_sets_def intro!: measurable_sets) 

393 
done 

394 
qed 

395 

396 
lemma ac_snd: 

397 
assumes "sigma_finite_measure T" 

398 
shows "absolutely_continuous T (distr (S \<Otimes>\<^isub>M T) T snd)" 

399 
proof  

400 
interpret sigma_finite_measure T by fact 

401 
{ fix A assume "A \<in> sets T" "emeasure T A = 0" 

402 
moreover then have "snd ` A \<inter> space (S \<Otimes>\<^isub>M T) = space S \<times> A" 

50244
de72bbe42190
qualified interpretation of sigma_algebra, to avoid name clashes
immler
parents:
50003
diff
changeset

403 
by (auto simp: space_pair_measure dest!: sets.sets_into_space) 
49803  404 
ultimately have "emeasure (S \<Otimes>\<^isub>M T) (snd ` A \<inter> space (S \<Otimes>\<^isub>M T)) = 0" 
405 
by (simp add: emeasure_pair_measure_Times) } 

406 
then show ?thesis 

407 
unfolding absolutely_continuous_def 

408 
apply (auto simp: null_sets_distr_iff) 

409 
apply (auto simp: null_sets_def intro!: measurable_sets) 

410 
done 

411 
qed 

412 

413 
lemma distributed_integrable: 

414 
"distributed M N X f \<Longrightarrow> g \<in> borel_measurable N \<Longrightarrow> 

415 
integrable N (\<lambda>x. f x * g x) \<longleftrightarrow> integrable M (\<lambda>x. g (X x))" 

50003  416 
by (auto simp: distributed_real_AE 
49803  417 
distributed_distr_eq_density[symmetric] integral_density[symmetric] integrable_distr_eq) 
418 

419 
lemma distributed_transform_integrable: 

420 
assumes Px: "distributed M N X Px" 

421 
assumes "distributed M P Y Py" 

422 
assumes Y: "Y = (\<lambda>x. T (X x))" and T: "T \<in> measurable N P" and f: "f \<in> borel_measurable P" 

423 
shows "integrable P (\<lambda>x. Py x * f x) \<longleftrightarrow> integrable N (\<lambda>x. Px x * f (T x))" 

424 
proof  

425 
have "integrable P (\<lambda>x. Py x * f x) \<longleftrightarrow> integrable M (\<lambda>x. f (Y x))" 

426 
by (rule distributed_integrable) fact+ 

427 
also have "\<dots> \<longleftrightarrow> integrable M (\<lambda>x. f (T (X x)))" 

428 
using Y by simp 

429 
also have "\<dots> \<longleftrightarrow> integrable N (\<lambda>x. Px x * f (T x))" 

430 
using measurable_comp[OF T f] Px by (intro distributed_integrable[symmetric]) (auto simp: comp_def) 

431 
finally show ?thesis . 

432 
qed 

433 

434 
lemma integrable_cong_AE_imp: "integrable M g \<Longrightarrow> f \<in> borel_measurable M \<Longrightarrow> (AE x in M. g x = f x) \<Longrightarrow> integrable M f" 

435 
using integrable_cong_AE by blast 

436 

437 
lemma (in information_space) finite_entropy_integrable: 

438 
"finite_entropy S X Px \<Longrightarrow> integrable S (\<lambda>x. Px x * log b (Px x))" 

439 
unfolding finite_entropy_def by auto 

440 

441 
lemma (in information_space) finite_entropy_distributed: 

442 
"finite_entropy S X Px \<Longrightarrow> distributed M S X Px" 

443 
unfolding finite_entropy_def by auto 

444 

445 
lemma (in information_space) finite_entropy_integrable_transform: 

446 
assumes Fx: "finite_entropy S X Px" 

447 
assumes Fy: "distributed M T Y Py" 

448 
and "X = (\<lambda>x. f (Y x))" 

449 
and "f \<in> measurable T S" 

450 
shows "integrable T (\<lambda>x. Py x * log b (Px (f x)))" 

451 
using assms unfolding finite_entropy_def 

452 
using distributed_transform_integrable[of M T Y Py S X Px f "\<lambda>x. log b (Px x)"] 

50003  453 
by auto 
49803  454 

39097  455 
subsection {* Mutual Information *} 
456 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

457 
definition (in prob_space) 
38656  458 
"mutual_information b S T X Y = 
47694  459 
KL_divergence b (distr M S X \<Otimes>\<^isub>M distr M T Y) (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)))" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

460 

47694  461 
lemma (in information_space) mutual_information_indep_vars: 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

462 
fixes S T X Y 
47694  463 
defines "P \<equiv> distr M S X \<Otimes>\<^isub>M distr M T Y" 
464 
defines "Q \<equiv> distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

465 
shows "indep_var S X T Y \<longleftrightarrow> 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

466 
(random_variable S X \<and> random_variable T Y \<and> 
47694  467 
absolutely_continuous P Q \<and> integrable Q (entropy_density b P Q) \<and> 
468 
mutual_information b S T X Y = 0)" 

469 
unfolding indep_var_distribution_eq 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

470 
proof safe 
50003  471 
assume rv[measurable]: "random_variable S X" "random_variable T Y" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

472 

47694  473 
interpret X: prob_space "distr M S X" 
474 
by (rule prob_space_distr) fact 

475 
interpret Y: prob_space "distr M T Y" 

476 
by (rule prob_space_distr) fact 

477 
interpret XY: pair_prob_space "distr M S X" "distr M T Y" by default 

478 
interpret P: information_space P b unfolding P_def by default (rule b_gt_1) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

479 

47694  480 
interpret Q: prob_space Q unfolding Q_def 
50003  481 
by (rule prob_space_distr) simp 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

482 

47694  483 
{ assume "distr M S X \<Otimes>\<^isub>M distr M T Y = distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))" 
484 
then have [simp]: "Q = P" unfolding Q_def P_def by simp 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

485 

47694  486 
show ac: "absolutely_continuous P Q" by (simp add: absolutely_continuous_def) 
487 
then have ed: "entropy_density b P Q \<in> borel_measurable P" 

488 
by (rule P.measurable_entropy_density) simp 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

489 

47694  490 
have "AE x in P. 1 = RN_deriv P Q x" 
491 
proof (rule P.RN_deriv_unique) 

492 
show "density P (\<lambda>x. 1) = Q" 

493 
unfolding `Q = P` by (intro measure_eqI) (auto simp: emeasure_density) 

494 
qed auto 

495 
then have ae_0: "AE x in P. entropy_density b P Q x = 0" 

496 
by eventually_elim (auto simp: entropy_density_def) 

497 
then have "integrable P (entropy_density b P Q) \<longleftrightarrow> integrable Q (\<lambda>x. 0)" 

498 
using ed unfolding `Q = P` by (intro integrable_cong_AE) auto 

499 
then show "integrable Q (entropy_density b P Q)" by simp 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

500 

47694  501 
show "mutual_information b S T X Y = 0" 
502 
unfolding mutual_information_def KL_divergence_def P_def[symmetric] Q_def[symmetric] `Q = P` 

503 
using ae_0 by (simp cong: integral_cong_AE) } 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

504 

47694  505 
{ assume ac: "absolutely_continuous P Q" 
506 
assume int: "integrable Q (entropy_density b P Q)" 

507 
assume I_eq_0: "mutual_information b S T X Y = 0" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

508 

47694  509 
have eq: "Q = P" 
510 
proof (rule P.KL_eq_0_iff_eq_ac[THEN iffD1]) 

511 
show "prob_space Q" by unfold_locales 

512 
show "absolutely_continuous P Q" by fact 

513 
show "integrable Q (entropy_density b P Q)" by fact 

514 
show "sets Q = sets P" by (simp add: P_def Q_def sets_pair_measure) 

515 
show "KL_divergence b P Q = 0" 

516 
using I_eq_0 unfolding mutual_information_def by (simp add: P_def Q_def) 

517 
qed 

518 
then show "distr M S X \<Otimes>\<^isub>M distr M T Y = distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))" 

519 
unfolding P_def Q_def .. } 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

520 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

521 

40859  522 
abbreviation (in information_space) 
523 
mutual_information_Pow ("\<I>'(_ ; _')") where 

47694  524 
"\<I>(X ; Y) \<equiv> mutual_information b (count_space (X`space M)) (count_space (Y`space M)) X Y" 
41689
3e39b0e730d6
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
hoelzl
parents:
41661
diff
changeset

525 

47694  526 
lemma (in information_space) 
527 
fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" 

49803  528 
assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" 
529 
assumes Fx: "finite_entropy S X Px" and Fy: "finite_entropy T Y Py" 

530 
assumes Fxy: "finite_entropy (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" 

531 
defines "f \<equiv> \<lambda>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" 

532 
shows mutual_information_distr': "mutual_information b S T X Y = integral\<^isup>L (S \<Otimes>\<^isub>M T) f" (is "?M = ?R") 

533 
and mutual_information_nonneg': "0 \<le> mutual_information b S T X Y" 

534 
proof  

535 
have Px: "distributed M S X Px" 

536 
using Fx by (auto simp: finite_entropy_def) 

537 
have Py: "distributed M T Y Py" 

538 
using Fy by (auto simp: finite_entropy_def) 

539 
have Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" 

540 
using Fxy by (auto simp: finite_entropy_def) 

541 

542 
have X: "random_variable S X" 

50003  543 
using Px by auto 
49803  544 
have Y: "random_variable T Y" 
50003  545 
using Py by auto 
49803  546 
interpret S: sigma_finite_measure S by fact 
547 
interpret T: sigma_finite_measure T by fact 

548 
interpret ST: pair_sigma_finite S T .. 

549 
interpret X: prob_space "distr M S X" using X by (rule prob_space_distr) 

550 
interpret Y: prob_space "distr M T Y" using Y by (rule prob_space_distr) 

551 
interpret XY: pair_prob_space "distr M S X" "distr M T Y" .. 

552 
let ?P = "S \<Otimes>\<^isub>M T" 

553 
let ?D = "distr M ?P (\<lambda>x. (X x, Y x))" 

554 

555 
{ fix A assume "A \<in> sets S" 

556 
with X Y have "emeasure (distr M S X) A = emeasure ?D (A \<times> space T)" 

557 
by (auto simp: emeasure_distr measurable_Pair measurable_space 

558 
intro!: arg_cong[where f="emeasure M"]) } 

559 
note marginal_eq1 = this 

560 
{ fix A assume "A \<in> sets T" 

561 
with X Y have "emeasure (distr M T Y) A = emeasure ?D (space S \<times> A)" 

562 
by (auto simp: emeasure_distr measurable_Pair measurable_space 

563 
intro!: arg_cong[where f="emeasure M"]) } 

564 
note marginal_eq2 = this 

565 

566 
have eq: "(\<lambda>x. ereal (Px (fst x) * Py (snd x))) = (\<lambda>(x, y). ereal (Px x) * ereal (Py y))" 

567 
by auto 

568 

569 
have distr_eq: "distr M S X \<Otimes>\<^isub>M distr M T Y = density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))" 

570 
unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] eq 

571 
proof (subst pair_measure_density) 

572 
show "(\<lambda>x. ereal (Px x)) \<in> borel_measurable S" "(\<lambda>y. ereal (Py y)) \<in> borel_measurable T" 

573 
"AE x in S. 0 \<le> ereal (Px x)" "AE y in T. 0 \<le> ereal (Py y)" 

574 
using Px Py by (auto simp: distributed_def) 

575 
show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. 

576 
qed (fact  simp)+ 

577 

578 
have M: "?M = KL_divergence b (density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))) (density ?P (\<lambda>x. ereal (Pxy x)))" 

579 
unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] .. 

580 

581 
from Px Py have f: "(\<lambda>x. Px (fst x) * Py (snd x)) \<in> borel_measurable ?P" 

582 
by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') 

583 
have PxPy_nonneg: "AE x in ?P. 0 \<le> Px (fst x) * Py (snd x)" 

584 
proof (rule ST.AE_pair_measure) 

585 
show "{x \<in> space ?P. 0 \<le> Px (fst x) * Py (snd x)} \<in> sets ?P" 

586 
using f by auto 

587 
show "AE x in S. AE y in T. 0 \<le> Px (fst (x, y)) * Py (snd (x, y))" 

588 
using Px Py by (auto simp: zero_le_mult_iff dest!: distributed_real_AE) 

589 
qed 

590 

591 
have "(AE x in ?P. Px (fst x) = 0 \<longrightarrow> Pxy x = 0)" 

592 
by (rule subdensity_real[OF measurable_fst Pxy Px]) auto 

593 
moreover 

594 
have "(AE x in ?P. Py (snd x) = 0 \<longrightarrow> Pxy x = 0)" 

595 
by (rule subdensity_real[OF measurable_snd Pxy Py]) auto 

596 
ultimately have ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 \<longrightarrow> Pxy x = 0" 

597 
by eventually_elim auto 

598 

599 
show "?M = ?R" 

600 
unfolding M f_def 

601 
using b_gt_1 f PxPy_nonneg Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] ac 

602 
by (rule ST.KL_density_density) 

603 

604 
have X: "X = fst \<circ> (\<lambda>x. (X x, Y x))" and Y: "Y = snd \<circ> (\<lambda>x. (X x, Y x))" 

605 
by auto 

606 

607 
have "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Pxy x)  Pxy x * log b (Px (fst x))  Pxy x * log b (Py (snd x)))" 

608 
using finite_entropy_integrable[OF Fxy] 

609 
using finite_entropy_integrable_transform[OF Fx Pxy, of fst] 

610 
using finite_entropy_integrable_transform[OF Fy Pxy, of snd] 

611 
by simp 

612 
moreover have "f \<in> borel_measurable (S \<Otimes>\<^isub>M T)" 

613 
unfolding f_def using Px Py Pxy 

614 
by (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'' 

615 
intro!: borel_measurable_times borel_measurable_log borel_measurable_divide) 

616 
ultimately have int: "integrable (S \<Otimes>\<^isub>M T) f" 

617 
apply (rule integrable_cong_AE_imp) 

618 
using 

619 
distributed_transform_AE[OF measurable_fst ac_fst, of T, OF T Px] 

620 
distributed_transform_AE[OF measurable_snd ac_snd, of _ _ _ _ S, OF T Py] 

621 
subdensity_real[OF measurable_fst Pxy Px X] 

622 
subdensity_real[OF measurable_snd Pxy Py Y] 

623 
distributed_real_AE[OF Pxy] 

624 
by eventually_elim 

625 
(auto simp: f_def log_divide_eq log_mult_eq field_simps zero_less_mult_iff mult_nonneg_nonneg) 

626 

627 
show "0 \<le> ?M" unfolding M 

628 
proof (rule ST.KL_density_density_nonneg 

629 
[OF b_gt_1 f PxPy_nonneg _ Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] _ ac int[unfolded f_def]]) 

630 
show "prob_space (density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Pxy x))) " 

631 
unfolding distributed_distr_eq_density[OF Pxy, symmetric] 

632 
using distributed_measurable[OF Pxy] by (rule prob_space_distr) 

633 
show "prob_space (density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Px (fst x) * Py (snd x))))" 

634 
unfolding distr_eq[symmetric] by unfold_locales 

635 
qed 

636 
qed 

637 

638 

639 
lemma (in information_space) 

640 
fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" 

47694  641 
assumes "sigma_finite_measure S" "sigma_finite_measure T" 
642 
assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" 

643 
assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" 

644 
defines "f \<equiv> \<lambda>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" 

645 
shows mutual_information_distr: "mutual_information b S T X Y = integral\<^isup>L (S \<Otimes>\<^isub>M T) f" (is "?M = ?R") 

646 
and mutual_information_nonneg: "integrable (S \<Otimes>\<^isub>M T) f \<Longrightarrow> 0 \<le> mutual_information b S T X Y" 

40859  647 
proof  
47694  648 
have X: "random_variable S X" 
649 
using Px by (auto simp: distributed_def) 

650 
have Y: "random_variable T Y" 

651 
using Py by (auto simp: distributed_def) 

652 
interpret S: sigma_finite_measure S by fact 

653 
interpret T: sigma_finite_measure T by fact 

654 
interpret ST: pair_sigma_finite S T .. 

655 
interpret X: prob_space "distr M S X" using X by (rule prob_space_distr) 

656 
interpret Y: prob_space "distr M T Y" using Y by (rule prob_space_distr) 

657 
interpret XY: pair_prob_space "distr M S X" "distr M T Y" .. 

658 
let ?P = "S \<Otimes>\<^isub>M T" 

659 
let ?D = "distr M ?P (\<lambda>x. (X x, Y x))" 

660 

661 
{ fix A assume "A \<in> sets S" 

662 
with X Y have "emeasure (distr M S X) A = emeasure ?D (A \<times> space T)" 

663 
by (auto simp: emeasure_distr measurable_Pair measurable_space 

664 
intro!: arg_cong[where f="emeasure M"]) } 

665 
note marginal_eq1 = this 

666 
{ fix A assume "A \<in> sets T" 

667 
with X Y have "emeasure (distr M T Y) A = emeasure ?D (space S \<times> A)" 

668 
by (auto simp: emeasure_distr measurable_Pair measurable_space 

669 
intro!: arg_cong[where f="emeasure M"]) } 

670 
note marginal_eq2 = this 

671 

672 
have eq: "(\<lambda>x. ereal (Px (fst x) * Py (snd x))) = (\<lambda>(x, y). ereal (Px x) * ereal (Py y))" 

673 
by auto 

674 

675 
have distr_eq: "distr M S X \<Otimes>\<^isub>M distr M T Y = density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))" 

676 
unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] eq 

677 
proof (subst pair_measure_density) 

678 
show "(\<lambda>x. ereal (Px x)) \<in> borel_measurable S" "(\<lambda>y. ereal (Py y)) \<in> borel_measurable T" 

679 
"AE x in S. 0 \<le> ereal (Px x)" "AE y in T. 0 \<le> ereal (Py y)" 

680 
using Px Py by (auto simp: distributed_def) 

681 
show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. 

682 
qed (fact  simp)+ 

683 

684 
have M: "?M = KL_divergence b (density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))) (density ?P (\<lambda>x. ereal (Pxy x)))" 

685 
unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] .. 

686 

687 
from Px Py have f: "(\<lambda>x. Px (fst x) * Py (snd x)) \<in> borel_measurable ?P" 

688 
by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') 

689 
have PxPy_nonneg: "AE x in ?P. 0 \<le> Px (fst x) * Py (snd x)" 

690 
proof (rule ST.AE_pair_measure) 

691 
show "{x \<in> space ?P. 0 \<le> Px (fst x) * Py (snd x)} \<in> sets ?P" 

692 
using f by auto 

693 
show "AE x in S. AE y in T. 0 \<le> Px (fst (x, y)) * Py (snd (x, y))" 

694 
using Px Py by (auto simp: zero_le_mult_iff dest!: distributed_real_AE) 

695 
qed 

696 

697 
have "(AE x in ?P. Px (fst x) = 0 \<longrightarrow> Pxy x = 0)" 

698 
by (rule subdensity_real[OF measurable_fst Pxy Px]) auto 

699 
moreover 

700 
have "(AE x in ?P. Py (snd x) = 0 \<longrightarrow> Pxy x = 0)" 

701 
by (rule subdensity_real[OF measurable_snd Pxy Py]) auto 

702 
ultimately have ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 \<longrightarrow> Pxy x = 0" 

703 
by eventually_elim auto 

704 

705 
show "?M = ?R" 

706 
unfolding M f_def 

707 
using b_gt_1 f PxPy_nonneg Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] ac 

708 
by (rule ST.KL_density_density) 

709 

710 
assume int: "integrable (S \<Otimes>\<^isub>M T) f" 

711 
show "0 \<le> ?M" unfolding M 

712 
proof (rule ST.KL_density_density_nonneg 

713 
[OF b_gt_1 f PxPy_nonneg _ Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] _ ac int[unfolded f_def]]) 

714 
show "prob_space (density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Pxy x))) " 

715 
unfolding distributed_distr_eq_density[OF Pxy, symmetric] 

716 
using distributed_measurable[OF Pxy] by (rule prob_space_distr) 

717 
show "prob_space (density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Px (fst x) * Py (snd x))))" 

718 
unfolding distr_eq[symmetric] by unfold_locales 

40859  719 
qed 
720 
qed 

721 

722 
lemma (in information_space) 

47694  723 
fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" 
724 
assumes "sigma_finite_measure S" "sigma_finite_measure T" 

725 
assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" 

726 
assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" 

727 
assumes ae: "AE x in S. AE y in T. Pxy (x, y) = Px x * Py y" 

728 
shows mutual_information_eq_0: "mutual_information b S T X Y = 0" 

36624  729 
proof  
47694  730 
interpret S: sigma_finite_measure S by fact 
731 
interpret T: sigma_finite_measure T by fact 

732 
interpret ST: pair_sigma_finite S T .. 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

733 

47694  734 
have "AE x in S \<Otimes>\<^isub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0" 
735 
by (rule subdensity_real[OF measurable_fst Pxy Px]) auto 

736 
moreover 

737 
have "AE x in S \<Otimes>\<^isub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" 

738 
by (rule subdensity_real[OF measurable_snd Pxy Py]) auto 

739 
moreover 

740 
have "AE x in S \<Otimes>\<^isub>M T. Pxy x = Px (fst x) * Py (snd x)" 

741 
using distributed_real_measurable[OF Px] distributed_real_measurable[OF Py] distributed_real_measurable[OF Pxy] 

742 
by (intro ST.AE_pair_measure) (auto simp: ae intro!: measurable_snd'' measurable_fst'') 

743 
ultimately have "AE x in S \<Otimes>\<^isub>M T. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) = 0" 

744 
by eventually_elim simp 

745 
then have "(\<integral>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) \<partial>(S \<Otimes>\<^isub>M T)) = (\<integral>x. 0 \<partial>(S \<Otimes>\<^isub>M T))" 

746 
by (rule integral_cong_AE) 

747 
then show ?thesis 

748 
by (subst mutual_information_distr[OF assms(15)]) simp 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

749 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

750 

47694  751 
lemma (in information_space) mutual_information_simple_distributed: 
752 
assumes X: "simple_distributed M X Px" and Y: "simple_distributed M Y Py" 

753 
assumes XY: "simple_distributed M (\<lambda>x. (X x, Y x)) Pxy" 

754 
shows "\<I>(X ; Y) = (\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x))`space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" 

755 
proof (subst mutual_information_distr[OF _ _ simple_distributed[OF X] simple_distributed[OF Y] simple_distributed_joint[OF XY]]) 

756 
note fin = simple_distributed_joint_finite[OF XY, simp] 

757 
show "sigma_finite_measure (count_space (X ` space M))" 

758 
by (simp add: sigma_finite_measure_count_space_finite) 

759 
show "sigma_finite_measure (count_space (Y ` space M))" 

760 
by (simp add: sigma_finite_measure_count_space_finite) 

761 
let ?Pxy = "\<lambda>x. (if x \<in> (\<lambda>x. (X x, Y x)) ` space M then Pxy x else 0)" 

762 
let ?f = "\<lambda>x. ?Pxy x * log b (?Pxy x / (Px (fst x) * Py (snd x)))" 

763 
have "\<And>x. ?f x = (if x \<in> (\<lambda>x. (X x, Y x)) ` space M then Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) else 0)" 

764 
by auto 

765 
with fin show "(\<integral> x. ?f x \<partial>(count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M))) = 

766 
(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" 

767 
by (auto simp add: pair_measure_count_space lebesgue_integral_count_space_finite setsum_cases split_beta' 

768 
intro!: setsum_cong) 

769 
qed 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

770 

47694  771 
lemma (in information_space) 
772 
fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" 

773 
assumes Px: "simple_distributed M X Px" and Py: "simple_distributed M Y Py" 

774 
assumes Pxy: "simple_distributed M (\<lambda>x. (X x, Y x)) Pxy" 

775 
assumes ae: "\<forall>x\<in>space M. Pxy (X x, Y x) = Px (X x) * Py (Y x)" 

776 
shows mutual_information_eq_0_simple: "\<I>(X ; Y) = 0" 

777 
proof (subst mutual_information_simple_distributed[OF Px Py Pxy]) 

778 
have "(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y))) = 

779 
(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. 0)" 

780 
by (intro setsum_cong) (auto simp: ae) 

781 
then show "(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. 

782 
Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y))) = 0" by simp 

783 
qed 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

784 

39097  785 
subsection {* Entropy *} 
786 

47694  787 
definition (in prob_space) entropy :: "real \<Rightarrow> 'b measure \<Rightarrow> ('a \<Rightarrow> 'b) \<Rightarrow> real" where 
788 
"entropy b S X =  KL_divergence b S (distr M S X)" 

789 

40859  790 
abbreviation (in information_space) 
791 
entropy_Pow ("\<H>'(_')") where 

47694  792 
"\<H>(X) \<equiv> entropy b (count_space (X`space M)) X" 
41981
cdf7693bbe08
reworked Probability theory: measures are not type restricted to positive extended reals
hoelzl
parents:
41833
diff
changeset

793 

49791  794 
lemma (in prob_space) distributed_RN_deriv: 
795 
assumes X: "distributed M S X Px" 

796 
shows "AE x in S. RN_deriv S (density S Px) x = Px x" 

797 
proof  

798 
note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X] 

799 
interpret X: prob_space "distr M S X" 

800 
using D(1) by (rule prob_space_distr) 

801 

802 
have sf: "sigma_finite_measure (distr M S X)" by default 

803 
show ?thesis 

804 
using D 

805 
apply (subst eq_commute) 

806 
apply (intro RN_deriv_unique_sigma_finite) 

807 
apply (auto intro: divide_nonneg_nonneg measure_nonneg 

808 
simp: distributed_distr_eq_density[symmetric, OF X] sf) 

809 
done 

810 
qed 

811 

49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

812 
lemma (in information_space) 
47694  813 
fixes X :: "'a \<Rightarrow> 'b" 
49785  814 
assumes X: "distributed M MX X f" 
49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

815 
shows entropy_distr: "entropy b MX X =  (\<integral>x. f x * log b (f x) \<partial>MX)" (is ?eq) 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

816 
proof  
49785  817 
note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X] 
49791  818 
note ae = distributed_RN_deriv[OF X] 
49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

819 

3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

820 
have ae_eq: "AE x in distr M MX X. log b (real (RN_deriv MX (distr M MX X) x)) = 
49785  821 
log b (f x)" 
822 
unfolding distributed_distr_eq_density[OF X] 

823 
apply (subst AE_density) 

824 
using D apply simp 

825 
using ae apply eventually_elim 

826 
apply auto 

827 
done 

49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

828 

3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

829 
have int_eq: " (\<integral> x. log b (f x) \<partial>distr M MX X) =  (\<integral> x. f x * log b (f x) \<partial>MX)" 
49785  830 
unfolding distributed_distr_eq_density[OF X] 
831 
using D 

832 
by (subst integral_density) 

833 
(auto simp: borel_measurable_ereal_iff) 

49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

834 

3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

835 
show ?eq 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

836 
unfolding entropy_def KL_divergence_def entropy_density_def comp_def 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

837 
apply (subst integral_cong_AE) 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

838 
apply (rule ae_eq) 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

839 
apply (rule int_eq) 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

840 
done 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

841 
qed 
49785  842 

49786  843 
lemma (in prob_space) distributed_imp_emeasure_nonzero: 
844 
assumes X: "distributed M MX X Px" 

845 
shows "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> 0" 

846 
proof 

847 
note Px = distributed_borel_measurable[OF X] distributed_AE[OF X] 

848 
interpret X: prob_space "distr M MX X" 

849 
using distributed_measurable[OF X] by (rule prob_space_distr) 

850 

851 
assume "emeasure MX {x \<in> space MX. Px x \<noteq> 0} = 0" 

852 
with Px have "AE x in MX. Px x = 0" 

853 
by (intro AE_I[OF subset_refl]) (auto simp: borel_measurable_ereal_iff) 

854 
moreover 

855 
from X.emeasure_space_1 have "(\<integral>\<^isup>+x. Px x \<partial>MX) = 1" 

856 
unfolding distributed_distr_eq_density[OF X] using Px 

857 
by (subst (asm) emeasure_density) 

858 
(auto simp: borel_measurable_ereal_iff intro!: integral_cong cong: positive_integral_cong) 

859 
ultimately show False 

860 
by (simp add: positive_integral_cong_AE) 

861 
qed 

862 

863 
lemma (in information_space) entropy_le: 

864 
fixes Px :: "'b \<Rightarrow> real" and MX :: "'b measure" 

865 
assumes X: "distributed M MX X Px" 

866 
and fin: "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> \<infinity>" 

867 
and int: "integrable MX (\<lambda>x.  Px x * log b (Px x))" 

868 
shows "entropy b MX X \<le> log b (measure MX {x \<in> space MX. Px x \<noteq> 0})" 

869 
proof  

870 
note Px = distributed_borel_measurable[OF X] distributed_AE[OF X] 

871 
interpret X: prob_space "distr M MX X" 

872 
using distributed_measurable[OF X] by (rule prob_space_distr) 

873 

874 
have "  log b (measure MX {x \<in> space MX. Px x \<noteq> 0}) = 

875 
 log b (\<integral> x. indicator {x \<in> space MX. Px x \<noteq> 0} x \<partial>MX)" 

876 
using Px fin 

877 
by (subst integral_indicator) (auto simp: measure_def borel_measurable_ereal_iff) 

878 
also have " log b (\<integral> x. indicator {x \<in> space MX. Px x \<noteq> 0} x \<partial>MX) =  log b (\<integral> x. 1 / Px x \<partial>distr M MX X)" 

879 
unfolding distributed_distr_eq_density[OF X] using Px 

880 
apply (intro arg_cong[where f="log b"] arg_cong[where f=uminus]) 

881 
by (subst integral_density) (auto simp: borel_measurable_ereal_iff intro!: integral_cong) 

882 
also have "\<dots> \<le> (\<integral> x.  log b (1 / Px x) \<partial>distr M MX X)" 

883 
proof (rule X.jensens_inequality[of "\<lambda>x. 1 / Px x" "{0<..}" 0 1 "\<lambda>x.  log b x"]) 

884 
show "AE x in distr M MX X. 1 / Px x \<in> {0<..}" 

885 
unfolding distributed_distr_eq_density[OF X] 

886 
using Px by (auto simp: AE_density) 

887 
have [simp]: "\<And>x. x \<in> space MX \<Longrightarrow> ereal (if Px x = 0 then 0 else 1) = indicator {x \<in> space MX. Px x \<noteq> 0} x" 

888 
by (auto simp: one_ereal_def) 

889 
have "(\<integral>\<^isup>+ x. max 0 (ereal ( (if Px x = 0 then 0 else 1))) \<partial>MX) = (\<integral>\<^isup>+ x. 0 \<partial>MX)" 

890 
by (intro positive_integral_cong) (auto split: split_max) 

891 
then show "integrable (distr M MX X) (\<lambda>x. 1 / Px x)" 

892 
unfolding distributed_distr_eq_density[OF X] using Px 

893 
by (auto simp: positive_integral_density integrable_def borel_measurable_ereal_iff fin positive_integral_max_0 

894 
cong: positive_integral_cong) 

895 
have "integrable MX (\<lambda>x. Px x * log b (1 / Px x)) = 

896 
integrable MX (\<lambda>x.  Px x * log b (Px x))" 

897 
using Px 

898 
by (intro integrable_cong_AE) 

899 
(auto simp: borel_measurable_ereal_iff log_divide_eq 

900 
intro!: measurable_If) 

901 
then show "integrable (distr M MX X) (\<lambda>x.  log b (1 / Px x))" 

902 
unfolding distributed_distr_eq_density[OF X] 

903 
using Px int 

904 
by (subst integral_density) (auto simp: borel_measurable_ereal_iff) 

905 
qed (auto simp: minus_log_convex[OF b_gt_1]) 

906 
also have "\<dots> = (\<integral> x. log b (Px x) \<partial>distr M MX X)" 

907 
unfolding distributed_distr_eq_density[OF X] using Px 

908 
by (intro integral_cong_AE) (auto simp: AE_density log_divide_eq) 

909 
also have "\<dots> =  entropy b MX X" 

910 
unfolding distributed_distr_eq_density[OF X] using Px 

911 
by (subst entropy_distr[OF X]) (auto simp: borel_measurable_ereal_iff integral_density) 

912 
finally show ?thesis 

913 
by simp 

914 
qed 

915 

916 
lemma (in information_space) entropy_le_space: 

917 
fixes Px :: "'b \<Rightarrow> real" and MX :: "'b measure" 

918 
assumes X: "distributed M MX X Px" 

919 
and fin: "finite_measure MX" 

920 
and int: "integrable MX (\<lambda>x.  Px x * log b (Px x))" 

921 
shows "entropy b MX X \<le> log b (measure MX (space MX))" 

922 
proof  

923 
note Px = distributed_borel_measurable[OF X] distributed_AE[OF X] 

924 
interpret finite_measure MX by fact 

925 
have "entropy b MX X \<le> log b (measure MX {x \<in> space MX. Px x \<noteq> 0})" 

926 
using int X by (intro entropy_le) auto 

927 
also have "\<dots> \<le> log b (measure MX (space MX))" 

928 
using Px distributed_imp_emeasure_nonzero[OF X] 

929 
by (intro log_le) 

930 
(auto intro!: borel_measurable_ereal_iff finite_measure_mono b_gt_1 

931 
less_le[THEN iffD2] measure_nonneg simp: emeasure_eq_measure) 

932 
finally show ?thesis . 

933 
qed 

934 

47694  935 
lemma (in information_space) entropy_uniform: 
49785  936 
assumes X: "distributed M MX X (\<lambda>x. indicator A x / measure MX A)" (is "distributed _ _ _ ?f") 
47694  937 
shows "entropy b MX X = log b (measure MX A)" 
49785  938 
proof (subst entropy_distr[OF X]) 
939 
have [simp]: "emeasure MX A \<noteq> \<infinity>" 

940 
using uniform_distributed_params[OF X] by (auto simp add: measure_def) 

941 
have eq: "(\<integral> x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \<partial>MX) = 

942 
(\<integral> x. ( log b (measure MX A) / measure MX A) * indicator A x \<partial>MX)" 

943 
using measure_nonneg[of MX A] uniform_distributed_params[OF X] 

944 
by (auto intro!: integral_cong split: split_indicator simp: log_divide_eq) 

945 
show " (\<integral> x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \<partial>MX) = 

946 
log b (measure MX A)" 

947 
unfolding eq using uniform_distributed_params[OF X] 

948 
by (subst lebesgue_integral_cmult) (auto simp: measure_def) 

949 
qed 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

950 

47694  951 
lemma (in information_space) entropy_simple_distributed: 
49786  952 
"simple_distributed M X f \<Longrightarrow> \<H>(X) =  (\<Sum>x\<in>X`space M. f x * log b (f x))" 
953 
by (subst entropy_distr[OF simple_distributed]) 

954 
(auto simp add: lebesgue_integral_count_space_finite) 

39097  955 

40859  956 
lemma (in information_space) entropy_le_card_not_0: 
47694  957 
assumes X: "simple_distributed M X f" 
958 
shows "\<H>(X) \<le> log b (card (X ` space M \<inter> {x. f x \<noteq> 0}))" 

39097  959 
proof  
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

960 
let ?X = "count_space (X`space M)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

961 
have "\<H>(X) \<le> log b (measure ?X {x \<in> space ?X. f x \<noteq> 0})" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

962 
by (rule entropy_le[OF simple_distributed[OF X]]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

963 
(simp_all add: simple_distributed_finite[OF X] subset_eq integrable_count_space emeasure_count_space) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

964 
also have "measure ?X {x \<in> space ?X. f x \<noteq> 0} = card (X ` space M \<inter> {x. f x \<noteq> 0})" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

965 
by (simp_all add: simple_distributed_finite[OF X] subset_eq emeasure_count_space measure_def Int_def) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

966 
finally show ?thesis . 
39097  967 
qed 
968 

40859  969 
lemma (in information_space) entropy_le_card: 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

970 
assumes X: "simple_distributed M X f" 
40859  971 
shows "\<H>(X) \<le> log b (real (card (X ` space M)))" 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

972 
proof  
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

973 
let ?X = "count_space (X`space M)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

974 
have "\<H>(X) \<le> log b (measure ?X (space ?X))" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

975 
by (rule entropy_le_space[OF simple_distributed[OF X]]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

976 
(simp_all add: simple_distributed_finite[OF X] subset_eq integrable_count_space emeasure_count_space finite_measure_count_space) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

977 
also have "measure ?X (space ?X) = card (X ` space M)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

978 
by (simp_all add: simple_distributed_finite[OF X] subset_eq emeasure_count_space measure_def) 
39097  979 
finally show ?thesis . 
980 
qed 

981 

982 
subsection {* Conditional Mutual Information *} 

983 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

984 
definition (in prob_space) 
41689
3e39b0e730d6
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
hoelzl
parents:
41661
diff
changeset

985 
"conditional_mutual_information b MX MY MZ X Y Z \<equiv> 
3e39b0e730d6
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
hoelzl
parents:
41661
diff
changeset

986 
mutual_information b MX (MY \<Otimes>\<^isub>M MZ) X (\<lambda>x. (Y x, Z x))  
3e39b0e730d6
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
hoelzl
parents:
41661
diff
changeset

987 
mutual_information b MX MZ X Z" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

988 

40859  989 
abbreviation (in information_space) 
990 
conditional_mutual_information_Pow ("\<I>'( _ ; _  _ ')") where 

36624  991 
"\<I>(X ; Y  Z) \<equiv> conditional_mutual_information b 
47694  992 
(count_space (X ` space M)) (count_space (Y ` space M)) (count_space (Z ` space M)) X Y Z" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

993 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

994 
lemma (in information_space) 
47694  995 
assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" and P: "sigma_finite_measure P" 
50003  996 
assumes Px[measurable]: "distributed M S X Px" 
997 
assumes Pz[measurable]: "distributed M P Z Pz" 

998 
assumes Pyz[measurable]: "distributed M (T \<Otimes>\<^isub>M P) (\<lambda>x. (Y x, Z x)) Pyz" 

999 
assumes Pxz[measurable]: "distributed M (S \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Z x)) Pxz" 

1000 
assumes Pxyz[measurable]: "distributed M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x)) Pxyz" 

47694  1001 
assumes I1: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" 
1002 
assumes I2: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1003 
shows conditional_mutual_information_generic_eq: "conditional_mutual_information b S T P X Y Z 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1004 
= (\<integral>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" (is "?eq") 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1005 
and conditional_mutual_information_generic_nonneg: "0 \<le> conditional_mutual_information b S T P X Y Z" (is "?nonneg") 
40859  1006 
proof  
47694  1007 
interpret S: sigma_finite_measure S by fact 
1008 
interpret T: sigma_finite_measure T by fact 

1009 
interpret P: sigma_finite_measure P by fact 

1010 
interpret TP: pair_sigma_finite T P .. 

1011 
interpret SP: pair_sigma_finite S P .. 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1012 
interpret ST: pair_sigma_finite S T .. 
47694  1013 
interpret SPT: pair_sigma_finite "S \<Otimes>\<^isub>M P" T .. 
1014 
interpret STP: pair_sigma_finite S "T \<Otimes>\<^isub>M P" .. 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1015 
interpret TPS: pair_sigma_finite "T \<Otimes>\<^isub>M P" S .. 
47694  1016 
have TP: "sigma_finite_measure (T \<Otimes>\<^isub>M P)" .. 
1017 
have SP: "sigma_finite_measure (S \<Otimes>\<^isub>M P)" .. 

1018 
have YZ: "random_variable (T \<Otimes>\<^isub>M P) (\<lambda>x. (Y x, Z x))" 

1019 
using Pyz by (simp add: distributed_measurable) 

1020 

1021 
from Pxz Pxyz have distr_eq: "distr M (S \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Z x)) = 

1022 
distr (distr M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x))) (S \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). (x, z))" 

50003  1023 
by (simp add: comp_def distr_distr) 
40859  1024 

47694  1025 
have "mutual_information b S P X Z = 
1026 
(\<integral>x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \<partial>(S \<Otimes>\<^isub>M P))" 

1027 
by (rule mutual_information_distr[OF S P Px Pz Pxz]) 

1028 
also have "\<dots> = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" 

1029 
using b_gt_1 Pxz Px Pz 

50003  1030 
by (subst distributed_transform_integral[OF Pxyz Pxz, where T="\<lambda>(x, y, z). (x, z)"]) (auto simp: split_beta') 
47694  1031 
finally have mi_eq: 
1032 
"mutual_information b S P X Z = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" . 

1033 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1034 
have ae1: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Px (fst x) = 0 \<longrightarrow> Pxyz x = 0" 
47694  1035 
by (intro subdensity_real[of fst, OF _ Pxyz Px]) auto 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1036 
moreover have ae2: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pz (snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" 
50003  1037 
by (intro subdensity_real[of "\<lambda>x. snd (snd x)", OF _ Pxyz Pz]) auto 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1038 
moreover have ae3: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pxz (fst x, snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" 
50003  1039 
by (intro subdensity_real[of "\<lambda>x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) auto 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1040 
moreover have ae4: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pyz (snd x) = 0 \<longrightarrow> Pxyz x = 0" 
50003  1041 
by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) auto 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1042 
moreover have ae5: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Px (fst x)" 
50003  1043 
using Px by (intro STP.AE_pair_measure) (auto simp: comp_def dest: distributed_real_AE) 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1044 
moreover have ae6: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pyz (snd x)" 
50003  1045 
using Pyz by (intro STP.AE_pair_measure) (auto simp: comp_def dest: distributed_real_AE) 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1046 
moreover have ae7: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd (snd x))" 
50003  1047 
using Pz Pz[THEN distributed_real_measurable] 
1048 
by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure AE_I2[of S] dest: distributed_real_AE) 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1049 
moreover have ae8: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pxz (fst x, snd (snd x))" 
47694  1050 
using Pxz[THEN distributed_real_AE, THEN SP.AE_pair] 
50003  1051 
by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure) 
47694  1052 
moreover note Pxyz[THEN distributed_real_AE] 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1053 
ultimately have ae: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 
47694  1054 
Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x)))  
1055 
Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) = 

1056 
Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " 

1057 
proof eventually_elim 

1058 
case (goal1 x) 

1059 
show ?case 

40859  1060 
proof cases 
47694  1061 
assume "Pxyz x \<noteq> 0" 
1062 
with goal1 have "0 < Px (fst x)" "0 < Pz (snd (snd x))" "0 < Pxz (fst x, snd (snd x))" "0 < Pyz (snd x)" "0 < Pxyz x" 

1063 
by auto 

1064 
then show ?thesis 

1065 
using b_gt_1 by (simp add: log_simps mult_pos_pos less_imp_le field_simps) 

40859  1066 
qed simp 
1067 
qed 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1068 
with I1 I2 show ?eq 
40859  1069 
unfolding conditional_mutual_information_def 
47694  1070 
apply (subst mi_eq) 
1071 
apply (subst mutual_information_distr[OF S TP Px Pyz Pxyz]) 

1072 
apply (subst integral_diff(2)[symmetric]) 

1073 
apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff) 

1074 
done 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1075 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1076 
let ?P = "density (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) Pxyz" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1077 
interpret P: prob_space ?P 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1078 
unfolding distributed_distr_eq_density[OF Pxyz, symmetric] 
50003  1079 
by (rule prob_space_distr) simp 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1080 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1081 
let ?Q = "density (T \<Otimes>\<^isub>M P) Pyz" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1082 
interpret Q: prob_space ?Q 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1083 
unfolding distributed_distr_eq_density[OF Pyz, symmetric] 
50003  1084 
by (rule prob_space_distr) simp 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1085 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1086 
let ?f = "\<lambda>(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1087 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1088 
from subdensity_real[of snd, OF _ Pyz Pz] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1089 
have aeX1: "AE x in T \<Otimes>\<^isub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" by (auto simp: comp_def) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1090 
have aeX2: "AE x in T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd x)" 
50003  1091 
using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def dest: distributed_real_AE) 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1092 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1093 
have aeX3: "AE y in T \<Otimes>\<^isub>M P. (\<integral>\<^isup>+ x. ereal (Pxz (x, snd y)) \<partial>S) = ereal (Pz (snd y))" 
49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

1094 
using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz] 
50003  1095 
by (intro TP.AE_pair_measure) (auto dest: distributed_real_AE) 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1096 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1097 
have "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<le> (\<integral>\<^isup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1098 
apply (subst positive_integral_density) 
50003  1099 
apply simp 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1100 
apply (rule distributed_AE[OF Pxyz]) 
50003  1101 
apply auto [] 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1102 
apply (rule positive_integral_mono_AE) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1103 
using ae5 ae6 ae7 ae8 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1104 
apply eventually_elim 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1105 
apply (auto intro!: divide_nonneg_nonneg mult_nonneg_nonneg) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1106 
done 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1107 
also have "\<dots> = (\<integral>\<^isup>+(y, z). \<integral>\<^isup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \<partial>S \<partial>T \<Otimes>\<^isub>M P)" 
50003  1108 
by (subst STP.positive_integral_snd_measurable[symmetric]) (auto simp add: split_beta') 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1109 
also have "\<dots> = (\<integral>\<^isup>+x. ereal (Pyz x) * 1 \<partial>T \<Otimes>\<^isub>M P)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1110 
apply (rule positive_integral_cong_AE) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1111 
using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1112 
apply eventually_elim 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1113 
proof (case_tac x, simp del: times_ereal.simps add: space_pair_measure) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1114 
fix a b assume "Pz b = 0 \<longrightarrow> Pyz (a, b) = 0" "0 \<le> Pz b" "a \<in> space T \<and> b \<in> space P" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1115 
"(\<integral>\<^isup>+ x. ereal (Pxz (x, b)) \<partial>S) = ereal (Pz b)" "0 \<le> Pyz (a, b)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1116 
then show "(\<integral>\<^isup>+ x. ereal (Pxz (x, b)) * ereal (Pyz (a, b) / Pz b) \<partial>S) = ereal (Pyz (a, b))" 
50003  1117 
by (subst positive_integral_multc) 
1118 
(auto intro!: divide_nonneg_nonneg split: prod.split) 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1119 
qed 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1120 
also have "\<dots> = 1" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1121 
using Q.emeasure_space_1 distributed_AE[OF Pyz] distributed_distr_eq_density[OF Pyz] 
50003  1122 
by (subst positive_integral_density[symmetric]) auto 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1123 
finally have le1: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<le> 1" . 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1124 
also have "\<dots> < \<infinity>" by simp 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1125 
finally have fin: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<noteq> \<infinity>" by simp 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1126 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1127 
have pos: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<noteq> 0" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1128 
apply (subst positive_integral_density) 
50003  1129 
apply simp 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1130 
apply (rule distributed_AE[OF Pxyz]) 
50003  1131 
apply auto [] 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1132 
apply (simp add: split_beta') 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1133 
proof 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1134 
let ?g = "\<lambda>x. ereal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1135 
assume "(\<integral>\<^isup>+ x. ?g x \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P)) = 0" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1136 
then have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. ?g x \<le> 0" 
50003  1137 
by (intro positive_integral_0_iff_AE[THEN iffD1]) auto 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1138 
then have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pxyz x = 0" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1139 
using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1140 
by eventually_elim (auto split: split_if_asm simp: mult_le_0_iff divide_le_0_iff) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1141 
then have "(\<integral>\<^isup>+ x. ereal (Pxyz x) \<partial>S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) = 0" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1142 
by (subst positive_integral_cong_AE[of _ "\<lambda>x. 0"]) auto 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1143 
with P.emeasure_space_1 show False 
50003  1144 
by (subst (asm) emeasure_density) (auto cong: positive_integral_cong) 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1145 
qed 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1146 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1147 
have neg: "(\<integral>\<^isup>+ x.  ?f x \<partial>?P) = 0" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1148 
apply (rule positive_integral_0_iff_AE[THEN iffD2]) 
50003  1149 
apply simp 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1150 
apply (subst AE_density) 
50003  1151 
apply simp 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1152 
using ae5 ae6 ae7 ae8 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1153 
apply eventually_elim 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1154 
apply (auto intro!: mult_nonneg_nonneg divide_nonneg_nonneg) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1155 
done 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1156 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1157 
have I3: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1158 
apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integral_diff(1)[OF I1 I2]]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1159 
using ae 
50003  1160 
apply (auto simp: split_beta') 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1161 
done 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1162 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1163 
have " log b 1 \<le>  log b (integral\<^isup>L ?P ?f)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1164 
proof (intro le_imp_neg_le log_le[OF b_gt_1]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1165 
show "0 < integral\<^isup>L ?P ?f" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1166 
using neg pos fin positive_integral_positive[of ?P ?f] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1167 
by (cases "(\<integral>\<^isup>+ x. ?f x \<partial>?P)") (auto simp add: lebesgue_integral_def less_le split_beta') 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1168 
show "integral\<^isup>L ?P ?f \<le> 1" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1169 
using neg le1 fin positive_integral_positive[of ?P ?f] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1170 
by (cases "(\<integral>\<^isup>+ x. ?f x \<partial>?P)") (auto simp add: lebesgue_integral_def split_beta' one_ereal_def) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1171 
qed 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1172 
also have " log b (integral\<^isup>L ?P ?f) \<le> (\<integral> x.  log b (?f x) \<partial>?P)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1173 
proof (rule P.jensens_inequality[where a=0 and b=1 and I="{0<..}"]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1174 
show "AE x in ?P. ?f x \<in> {0<..}" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1175 
unfolding AE_density[OF distributed_borel_measurable[OF Pxyz]] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1176 
using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1177 
by eventually_elim (auto simp: divide_pos_pos mult_pos_pos) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1178 
show "integrable ?P ?f" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1179 
unfolding integrable_def 
50003  1180 
using fin neg by (auto simp: split_beta') 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1181 
show "integrable ?P (\<lambda>x.  log b (?f x))" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1182 
apply (subst integral_density) 
50003  1183 
apply simp 
1184 
apply (auto intro!: distributed_real_AE[OF Pxyz]) [] 

1185 
apply simp 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1186 
apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3]) 
50003  1187 
apply simp 
1188 
apply simp 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1189 
using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1190 
apply eventually_elim 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1191 
apply (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1192 
done 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1193 
qed (auto simp: b_gt_1 minus_log_convex) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1194 
also have "\<dots> = conditional_mutual_information b S T P X Y Z" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1195 
unfolding `?eq` 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1196 
apply (subst integral_density) 
50003  1197 
apply simp 
1198 
apply (auto intro!: distributed_real_AE[OF Pxyz]) [] 

1199 
apply simp 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1200 
apply (intro integral_cong_AE) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1201 
using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1202 
apply eventually_elim 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1203 
apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1204 
done 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1205 
finally show ?nonneg 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1206 
by simp 
40859  1207 
qed 
1208 

49803  1209 
lemma (in information_space) 
1210 
fixes Px :: "_ \<Rightarrow> real" 

1211 
assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" and P: "sigma_finite_measure P" 

1212 
assumes Fx: "finite_entropy S X Px" 

1213 
assumes Fz: "finite_entropy P Z Pz" 

1214 
assumes Fyz: "finite_entropy (T \<Otimes>\<^isub>M P) (\<lambda>x. (Y x, Z x)) Pyz" 

1215 
assumes Fxz: "finite_entropy (S \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Z x)) Pxz" 

1216 
assumes Fxyz: "finite_entropy (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x)) Pxyz" 

1217 
shows conditional_mutual_information_generic_eq': "conditional_mutual_information b S T P X Y Z 

1218 
= (\<integral>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" (is "?eq") 

1219 
and conditional_mutual_information_generic_nonneg': "0 \<le> conditional_mutual_information b S T P X Y Z" (is "?nonneg") 

1220 
proof  

50003  1221 
note Px = Fx[THEN finite_entropy_distributed, measurable] 
1222 
note Pz = Fz[THEN finite_entropy_distributed, measurable] 

1223 
note Pyz = Fyz[THEN finite_entropy_distributed, measurable] 

1224 
note Pxz = Fxz[THEN finite_entropy_distributed, measurable] 

1225 
note Pxyz = Fxyz[THEN finite_entropy_distributed, measurable] 

49803  1226 

1227 
interpret S: sigma_finite_measure S by fact 

1228 
interpret T: sigma_finite_measure T by fact 

1229 
interpret P: sigma_finite_measure P by fact 

1230 
interpret TP: pair_sigma_finite T P .. 

1231 
interpret SP: pair_sigma_finite S P .. 

1232 
interpret ST: pair_sigma_finite S T .. 

1233 
interpret SPT: pair_sigma_finite "S \<Otimes>\<^isub>M P" T .. 

1234 
interpret STP: pair_sigma_finite S "T \<Otimes>\<^isub>M P" .. 

1235 
interpret TPS: pair_sigma_finite "T \<Otimes>\<^isub>M P" S .. 

