author  hoelzl 
Wed, 10 Oct 2012 12:12:29 +0200  
changeset 49792  43f49922811d 
parent 49791  e0854abfb3fc 
child 49802  dd8dffaf84b9 
permissions  rwrr 
42067  1 
(* Title: HOL/Probability/Information.thy 
2 
Author: Johannes Hölzl, TU München 

3 
Author: Armin Heller, TU München 

4 
*) 

5 

6 
header {*Information theory*} 

7 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

8 
theory Information 
41413
64cd30d6b0b8
explicit file specifications  avoid secondary load path;
wenzelm
parents:
41095
diff
changeset

9 
imports 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

10 
Independent_Family 
43556
0d78c8d31d0d
move conditional expectation to its own theory file
hoelzl
parents:
43340
diff
changeset

11 
Radon_Nikodym 
41413
64cd30d6b0b8
explicit file specifications  avoid secondary load path;
wenzelm
parents:
41095
diff
changeset

12 
"~~/src/HOL/Library/Convex" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

13 
begin 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

14 

39097  15 
lemma log_le: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log a x \<le> log a y" 
16 
by (subst log_le_cancel_iff) auto 

17 

18 
lemma log_less: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x < y \<Longrightarrow> log a x < log a y" 

19 
by (subst log_less_cancel_iff) auto 

20 

21 
lemma setsum_cartesian_product': 

22 
"(\<Sum>x\<in>A \<times> B. f x) = (\<Sum>x\<in>A. setsum (\<lambda>y. f (x, y)) B)" 

23 
unfolding setsum_cartesian_product by simp 

24 

25 
lemma split_pairs: 

40859  26 
"((A, B) = X) \<longleftrightarrow> (fst X = A \<and> snd X = B)" and 
27 
"(X = (A, B)) \<longleftrightarrow> (fst X = A \<and> snd X = B)" by auto 

38656  28 

29 
section "Information theory" 

30 

40859  31 
locale information_space = prob_space + 
38656  32 
fixes b :: real assumes b_gt_1: "1 < b" 
33 

40859  34 
context information_space 
38656  35 
begin 
36 

40859  37 
text {* Introduce some simplification rules for logarithm of base @{term b}. *} 
38 

39 
lemma log_neg_const: 

40 
assumes "x \<le> 0" 

41 
shows "log b x = log b 0" 

36624  42 
proof  
40859  43 
{ fix u :: real 
44 
have "x \<le> 0" by fact 

45 
also have "0 < exp u" 

46 
using exp_gt_zero . 

47 
finally have "exp u \<noteq> x" 

48 
by auto } 

49 
then show "log b x = log b 0" 

50 
by (simp add: log_def ln_def) 

38656  51 
qed 
52 

40859  53 
lemma log_mult_eq: 
54 
"log b (A * B) = (if 0 < A * B then log b \<bar>A\<bar> + log b \<bar>B\<bar> else log b 0)" 

55 
using log_mult[of b "\<bar>A\<bar>" "\<bar>B\<bar>"] b_gt_1 log_neg_const[of "A * B"] 

56 
by (auto simp: zero_less_mult_iff mult_le_0_iff) 

38656  57 

40859  58 
lemma log_inverse_eq: 
59 
"log b (inverse B) = (if 0 < B then  log b B else log b 0)" 

60 
using log_inverse[of b B] log_neg_const[of "inverse B"] b_gt_1 by simp 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

61 

40859  62 
lemma log_divide_eq: 
63 
"log b (A / B) = (if 0 < A * B then log b \<bar>A\<bar>  log b \<bar>B\<bar> else log b 0)" 

64 
unfolding divide_inverse log_mult_eq log_inverse_eq abs_inverse 

65 
by (auto simp: zero_less_mult_iff mult_le_0_iff) 

38656  66 

40859  67 
lemmas log_simps = log_mult_eq log_inverse_eq log_divide_eq 
38656  68 

69 
end 

70 

39097  71 
subsection "Kullback$$Leibler divergence" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

72 

39097  73 
text {* The Kullback$$Leibler divergence is also known as relative entropy or 
74 
Kullback$$Leibler distance. *} 

75 

76 
definition 

47694  77 
"entropy_density b M N = log b \<circ> real \<circ> RN_deriv M N" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

78 

60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

79 
definition 
47694  80 
"KL_divergence b M N = integral\<^isup>L N (entropy_density b M N)" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

81 

60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

82 
lemma (in information_space) measurable_entropy_density: 
47694  83 
assumes ac: "absolutely_continuous M N" "sets N = events" 
84 
shows "entropy_density b M N \<in> borel_measurable M" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

85 
proof  
47694  86 
from borel_measurable_RN_deriv[OF ac] b_gt_1 show ?thesis 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

87 
unfolding entropy_density_def 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

88 
by (intro measurable_comp) auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

89 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

90 

47694  91 
lemma (in sigma_finite_measure) KL_density: 
92 
fixes f :: "'a \<Rightarrow> real" 

93 
assumes "1 < b" 

94 
assumes f: "f \<in> borel_measurable M" "AE x in M. 0 \<le> f x" 

95 
shows "KL_divergence b M (density M f) = (\<integral>x. f x * log b (f x) \<partial>M)" 

96 
unfolding KL_divergence_def 

97 
proof (subst integral_density) 

98 
show "entropy_density b M (density M (\<lambda>x. ereal (f x))) \<in> borel_measurable M" 

49776  99 
using f 
47694  100 
by (auto simp: comp_def entropy_density_def intro!: borel_measurable_log borel_measurable_RN_deriv_density) 
101 
have "density M (RN_deriv M (density M f)) = density M f" 

102 
using f by (intro density_RN_deriv_density) auto 

103 
then have eq: "AE x in M. RN_deriv M (density M f) x = f x" 

104 
using f 

105 
by (intro density_unique) 

106 
(auto intro!: borel_measurable_log borel_measurable_RN_deriv_density simp: RN_deriv_density_nonneg) 

107 
show "(\<integral>x. f x * entropy_density b M (density M (\<lambda>x. ereal (f x))) x \<partial>M) = (\<integral>x. f x * log b (f x) \<partial>M)" 

108 
apply (intro integral_cong_AE) 

109 
using eq 

110 
apply eventually_elim 

111 
apply (auto simp: entropy_density_def) 

112 
done 

113 
qed fact+ 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

114 

47694  115 
lemma (in sigma_finite_measure) KL_density_density: 
116 
fixes f g :: "'a \<Rightarrow> real" 

117 
assumes "1 < b" 

118 
assumes f: "f \<in> borel_measurable M" "AE x in M. 0 \<le> f x" 

119 
assumes g: "g \<in> borel_measurable M" "AE x in M. 0 \<le> g x" 

120 
assumes ac: "AE x in M. f x = 0 \<longrightarrow> g x = 0" 

121 
shows "KL_divergence b (density M f) (density M g) = (\<integral>x. g x * log b (g x / f x) \<partial>M)" 

122 
proof  

123 
interpret Mf: sigma_finite_measure "density M f" 

124 
using f by (subst sigma_finite_iff_density_finite) auto 

125 
have "KL_divergence b (density M f) (density M g) = 

126 
KL_divergence b (density M f) (density (density M f) (\<lambda>x. g x / f x))" 

127 
using f g ac by (subst density_density_divide) simp_all 

128 
also have "\<dots> = (\<integral>x. (g x / f x) * log b (g x / f x) \<partial>density M f)" 

129 
using f g `1 < b` by (intro Mf.KL_density) (auto simp: AE_density divide_nonneg_nonneg) 

130 
also have "\<dots> = (\<integral>x. g x * log b (g x / f x) \<partial>M)" 

131 
using ac f g `1 < b` by (subst integral_density) (auto intro!: integral_cong_AE) 

132 
finally show ?thesis . 

133 
qed 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

134 

47694  135 
lemma (in information_space) KL_gt_0: 
136 
fixes D :: "'a \<Rightarrow> real" 

137 
assumes "prob_space (density M D)" 

138 
assumes D: "D \<in> borel_measurable M" "AE x in M. 0 \<le> D x" 

139 
assumes int: "integrable M (\<lambda>x. D x * log b (D x))" 

140 
assumes A: "density M D \<noteq> M" 

141 
shows "0 < KL_divergence b M (density M D)" 

142 
proof  

143 
interpret N: prob_space "density M D" by fact 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

144 

47694  145 
obtain A where "A \<in> sets M" "emeasure (density M D) A \<noteq> emeasure M A" 
146 
using measure_eqI[of "density M D" M] `density M D \<noteq> M` by auto 

147 

148 
let ?D_set = "{x\<in>space M. D x \<noteq> 0}" 

149 
have [simp, intro]: "?D_set \<in> sets M" 

150 
using D by auto 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

151 

43920  152 
have D_neg: "(\<integral>\<^isup>+ x. ereal ( D x) \<partial>M) = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

153 
using D by (subst positive_integral_0_iff_AE) auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

154 

47694  155 
have "(\<integral>\<^isup>+ x. ereal (D x) \<partial>M) = emeasure (density M D) (space M)" 
156 
using D by (simp add: emeasure_density cong: positive_integral_cong) 

43920  157 
then have D_pos: "(\<integral>\<^isup>+ x. ereal (D x) \<partial>M) = 1" 
47694  158 
using N.emeasure_space_1 by simp 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

159 

47694  160 
have "integrable M D" "integral\<^isup>L M D = 1" 
161 
using D D_pos D_neg unfolding integrable_def lebesgue_integral_def by simp_all 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

162 

47694  163 
have "0 \<le> 1  measure M ?D_set" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

164 
using prob_le_1 by (auto simp: field_simps) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

165 
also have "\<dots> = (\<integral> x. D x  indicator ?D_set x \<partial>M)" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

166 
using `integrable M D` `integral\<^isup>L M D = 1` 
47694  167 
by (simp add: emeasure_eq_measure) 
168 
also have "\<dots> < (\<integral> x. D x * (ln b * log b (D x)) \<partial>M)" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

169 
proof (rule integral_less_AE) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

170 
show "integrable M (\<lambda>x. D x  indicator ?D_set x)" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

171 
using `integrable M D` 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

172 
by (intro integral_diff integral_indicator) auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

173 
next 
47694  174 
from integral_cmult(1)[OF int, of "ln b"] 
175 
show "integrable M (\<lambda>x. D x * (ln b * log b (D x)))" 

176 
by (simp add: ac_simps) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

177 
next 
47694  178 
show "emeasure M {x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} \<noteq> 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

179 
proof 
47694  180 
assume eq_0: "emeasure M {x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} = 0" 
181 
then have disj: "AE x in M. D x = 1 \<or> D x = 0" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

182 
using D(1) by (auto intro!: AE_I[OF subset_refl] sets_Collect) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

183 

47694  184 
have "emeasure M {x\<in>space M. D x = 1} = (\<integral>\<^isup>+ x. indicator {x\<in>space M. D x = 1} x \<partial>M)" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

185 
using D(1) by auto 
47694  186 
also have "\<dots> = (\<integral>\<^isup>+ x. ereal (D x) \<partial>M)" 
43920  187 
using disj by (auto intro!: positive_integral_cong_AE simp: indicator_def one_ereal_def) 
47694  188 
finally have "AE x in M. D x = 1" 
189 
using D D_pos by (intro AE_I_eq_1) auto 

43920  190 
then have "(\<integral>\<^isup>+x. indicator A x\<partial>M) = (\<integral>\<^isup>+x. ereal (D x) * indicator A x\<partial>M)" 
191 
by (intro positive_integral_cong_AE) (auto simp: one_ereal_def[symmetric]) 

47694  192 
also have "\<dots> = density M D A" 
193 
using `A \<in> sets M` D by (simp add: emeasure_density) 

194 
finally show False using `A \<in> sets M` `emeasure (density M D) A \<noteq> emeasure M A` by simp 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

195 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

196 
show "{x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} \<in> sets M" 
47694  197 
using D(1) by (auto intro: sets_Collect_conj) 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

198 

47694  199 
show "AE t in M. t \<in> {x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} \<longrightarrow> 
200 
D t  indicator ?D_set t \<noteq> D t * (ln b * log b (D t))" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

201 
using D(2) 
47694  202 
proof (eventually_elim, safe) 
203 
fix t assume Dt: "t \<in> space M" "D t \<noteq> 1" "D t \<noteq> 0" "0 \<le> D t" 

204 
and eq: "D t  indicator ?D_set t = D t * (ln b * log b (D t))" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

205 

60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

206 
have "D t  1 = D t  indicator ?D_set t" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

207 
using Dt by simp 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

208 
also note eq 
47694  209 
also have "D t * (ln b * log b (D t)) =  D t * ln (1 / D t)" 
210 
using b_gt_1 `D t \<noteq> 0` `0 \<le> D t` 

211 
by (simp add: log_def ln_div less_le) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

212 
finally have "ln (1 / D t) = 1 / D t  1" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

213 
using `D t \<noteq> 0` by (auto simp: field_simps) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

214 
from ln_eq_minus_one[OF _ this] `D t \<noteq> 0` `0 \<le> D t` `D t \<noteq> 1` 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

215 
show False by auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

216 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

217 

47694  218 
show "AE t in M. D t  indicator ?D_set t \<le> D t * (ln b * log b (D t))" 
219 
using D(2) AE_space 

220 
proof eventually_elim 

221 
fix t assume "t \<in> space M" "0 \<le> D t" 

222 
show "D t  indicator ?D_set t \<le> D t * (ln b * log b (D t))" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

223 
proof cases 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

224 
assume asm: "D t \<noteq> 0" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

225 
then have "0 < D t" using `0 \<le> D t` by auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

226 
then have "0 < 1 / D t" by auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

227 
have "D t  indicator ?D_set t \<le>  D t * (1 / D t  1)" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

228 
using asm `t \<in> space M` by (simp add: field_simps) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

229 
also have " D t * (1 / D t  1) \<le>  D t * ln (1 / D t)" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

230 
using ln_le_minus_one `0 < 1 / D t` by (intro mult_left_mono_neg) auto 
47694  231 
also have "\<dots> = D t * (ln b * log b (D t))" 
232 
using `0 < D t` b_gt_1 

233 
by (simp_all add: log_def ln_div) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

234 
finally show ?thesis by simp 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

235 
qed simp 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

236 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

237 
qed 
47694  238 
also have "\<dots> = (\<integral> x. ln b * (D x * log b (D x)) \<partial>M)" 
239 
by (simp add: ac_simps) 

240 
also have "\<dots> = ln b * (\<integral> x. D x * log b (D x) \<partial>M)" 

241 
using int by (rule integral_cmult) 

242 
finally show ?thesis 

243 
using b_gt_1 D by (subst KL_density) (auto simp: zero_less_mult_iff) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

244 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

245 

47694  246 
lemma (in sigma_finite_measure) KL_same_eq_0: "KL_divergence b M M = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

247 
proof  
47694  248 
have "AE x in M. 1 = RN_deriv M M x" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

249 
proof (rule RN_deriv_unique) 
47694  250 
show "(\<lambda>x. 1) \<in> borel_measurable M" "AE x in M. 0 \<le> (1 :: ereal)" by auto 
251 
show "density M (\<lambda>x. 1) = M" 

252 
apply (auto intro!: measure_eqI emeasure_density) 

253 
apply (subst emeasure_density) 

254 
apply auto 

255 
done 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

256 
qed 
47694  257 
then have "AE x in M. log b (real (RN_deriv M M x)) = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

258 
by (elim AE_mp) simp 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

259 
from integral_cong_AE[OF this] 
47694  260 
have "integral\<^isup>L M (entropy_density b M M) = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

261 
by (simp add: entropy_density_def comp_def) 
47694  262 
then show "KL_divergence b M M = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

263 
unfolding KL_divergence_def 
47694  264 
by auto 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

265 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

266 

47694  267 
lemma (in information_space) KL_eq_0_iff_eq: 
268 
fixes D :: "'a \<Rightarrow> real" 

269 
assumes "prob_space (density M D)" 

270 
assumes D: "D \<in> borel_measurable M" "AE x in M. 0 \<le> D x" 

271 
assumes int: "integrable M (\<lambda>x. D x * log b (D x))" 

272 
shows "KL_divergence b M (density M D) = 0 \<longleftrightarrow> density M D = M" 

273 
using KL_same_eq_0[of b] KL_gt_0[OF assms] 

274 
by (auto simp: less_le) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

275 

47694  276 
lemma (in information_space) KL_eq_0_iff_eq_ac: 
277 
fixes D :: "'a \<Rightarrow> real" 

278 
assumes "prob_space N" 

279 
assumes ac: "absolutely_continuous M N" "sets N = sets M" 

280 
assumes int: "integrable N (entropy_density b M N)" 

281 
shows "KL_divergence b M N = 0 \<longleftrightarrow> N = M" 

41833
563bea92b2c0
add lemma KL_divergence_vimage, mutual_information_generic
hoelzl
parents:
41689
diff
changeset

282 
proof  
47694  283 
interpret N: prob_space N by fact 
284 
have "finite_measure N" by unfold_locales 

285 
from real_RN_deriv[OF this ac] guess D . note D = this 

286 

287 
have "N = density M (RN_deriv M N)" 

288 
using ac by (rule density_RN_deriv[symmetric]) 

289 
also have "\<dots> = density M D" 

290 
using borel_measurable_RN_deriv[OF ac] D by (auto intro!: density_cong) 

291 
finally have N: "N = density M D" . 

41833
563bea92b2c0
add lemma KL_divergence_vimage, mutual_information_generic
hoelzl
parents:
41689
diff
changeset

292 

47694  293 
from absolutely_continuous_AE[OF ac(2,1) D(2)] D b_gt_1 ac measurable_entropy_density 
294 
have "integrable N (\<lambda>x. log b (D x))" 

295 
by (intro integrable_cong_AE[THEN iffD2, OF _ _ _ int]) 

296 
(auto simp: N entropy_density_def) 

297 
with D b_gt_1 have "integrable M (\<lambda>x. D x * log b (D x))" 

298 
by (subst integral_density(2)[symmetric]) (auto simp: N[symmetric] comp_def) 

299 
with `prob_space N` D show ?thesis 

300 
unfolding N 

301 
by (intro KL_eq_0_iff_eq) auto 

41833
563bea92b2c0
add lemma KL_divergence_vimage, mutual_information_generic
hoelzl
parents:
41689
diff
changeset

302 
qed 
563bea92b2c0
add lemma KL_divergence_vimage, mutual_information_generic
hoelzl
parents:
41689
diff
changeset

303 

47694  304 
lemma (in information_space) KL_nonneg: 
305 
assumes "prob_space (density M D)" 

306 
assumes D: "D \<in> borel_measurable M" "AE x in M. 0 \<le> D x" 

307 
assumes int: "integrable M (\<lambda>x. D x * log b (D x))" 

308 
shows "0 \<le> KL_divergence b M (density M D)" 

309 
using KL_gt_0[OF assms] by (cases "density M D = M") (auto simp: KL_same_eq_0) 

40859  310 

47694  311 
lemma (in sigma_finite_measure) KL_density_density_nonneg: 
312 
fixes f g :: "'a \<Rightarrow> real" 

313 
assumes "1 < b" 

314 
assumes f: "f \<in> borel_measurable M" "AE x in M. 0 \<le> f x" "prob_space (density M f)" 

315 
assumes g: "g \<in> borel_measurable M" "AE x in M. 0 \<le> g x" "prob_space (density M g)" 

316 
assumes ac: "AE x in M. f x = 0 \<longrightarrow> g x = 0" 

317 
assumes int: "integrable M (\<lambda>x. g x * log b (g x / f x))" 

318 
shows "0 \<le> KL_divergence b (density M f) (density M g)" 

319 
proof  

320 
interpret Mf: prob_space "density M f" by fact 

321 
interpret Mf: information_space "density M f" b by default fact 

322 
have eq: "density (density M f) (\<lambda>x. g x / f x) = density M g" (is "?DD = _") 

323 
using f g ac by (subst density_density_divide) simp_all 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

324 

47694  325 
have "0 \<le> KL_divergence b (density M f) (density (density M f) (\<lambda>x. g x / f x))" 
326 
proof (rule Mf.KL_nonneg) 

327 
show "prob_space ?DD" unfolding eq by fact 

328 
from f g show "(\<lambda>x. g x / f x) \<in> borel_measurable (density M f)" 

329 
by auto 

330 
show "AE x in density M f. 0 \<le> g x / f x" 

331 
using f g by (auto simp: AE_density divide_nonneg_nonneg) 

332 
show "integrable (density M f) (\<lambda>x. g x / f x * log b (g x / f x))" 

333 
using `1 < b` f g ac 

334 
by (subst integral_density) 

335 
(auto intro!: integrable_cong_AE[THEN iffD2, OF _ _ _ int] measurable_If) 

336 
qed 

337 
also have "\<dots> = KL_divergence b (density M f) (density M g)" 

338 
using f g ac by (subst density_density_divide) simp_all 

339 
finally show ?thesis . 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

340 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

341 

39097  342 
subsection {* Mutual Information *} 
343 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

344 
definition (in prob_space) 
38656  345 
"mutual_information b S T X Y = 
47694  346 
KL_divergence b (distr M S X \<Otimes>\<^isub>M distr M T Y) (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)))" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

347 

47694  348 
lemma (in information_space) mutual_information_indep_vars: 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

349 
fixes S T X Y 
47694  350 
defines "P \<equiv> distr M S X \<Otimes>\<^isub>M distr M T Y" 
351 
defines "Q \<equiv> distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

352 
shows "indep_var S X T Y \<longleftrightarrow> 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

353 
(random_variable S X \<and> random_variable T Y \<and> 
47694  354 
absolutely_continuous P Q \<and> integrable Q (entropy_density b P Q) \<and> 
355 
mutual_information b S T X Y = 0)" 

356 
unfolding indep_var_distribution_eq 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

357 
proof safe 
47694  358 
assume rv: "random_variable S X" "random_variable T Y" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

359 

47694  360 
interpret X: prob_space "distr M S X" 
361 
by (rule prob_space_distr) fact 

362 
interpret Y: prob_space "distr M T Y" 

363 
by (rule prob_space_distr) fact 

364 
interpret XY: pair_prob_space "distr M S X" "distr M T Y" by default 

365 
interpret P: information_space P b unfolding P_def by default (rule b_gt_1) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

366 

47694  367 
interpret Q: prob_space Q unfolding Q_def 
368 
by (rule prob_space_distr) (simp add: comp_def measurable_pair_iff rv) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

369 

47694  370 
{ assume "distr M S X \<Otimes>\<^isub>M distr M T Y = distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))" 
371 
then have [simp]: "Q = P" unfolding Q_def P_def by simp 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

372 

47694  373 
show ac: "absolutely_continuous P Q" by (simp add: absolutely_continuous_def) 
374 
then have ed: "entropy_density b P Q \<in> borel_measurable P" 

375 
by (rule P.measurable_entropy_density) simp 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

376 

47694  377 
have "AE x in P. 1 = RN_deriv P Q x" 
378 
proof (rule P.RN_deriv_unique) 

379 
show "density P (\<lambda>x. 1) = Q" 

380 
unfolding `Q = P` by (intro measure_eqI) (auto simp: emeasure_density) 

381 
qed auto 

382 
then have ae_0: "AE x in P. entropy_density b P Q x = 0" 

383 
by eventually_elim (auto simp: entropy_density_def) 

384 
then have "integrable P (entropy_density b P Q) \<longleftrightarrow> integrable Q (\<lambda>x. 0)" 

385 
using ed unfolding `Q = P` by (intro integrable_cong_AE) auto 

386 
then show "integrable Q (entropy_density b P Q)" by simp 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

387 

47694  388 
show "mutual_information b S T X Y = 0" 
389 
unfolding mutual_information_def KL_divergence_def P_def[symmetric] Q_def[symmetric] `Q = P` 

390 
using ae_0 by (simp cong: integral_cong_AE) } 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

391 

47694  392 
{ assume ac: "absolutely_continuous P Q" 
393 
assume int: "integrable Q (entropy_density b P Q)" 

394 
assume I_eq_0: "mutual_information b S T X Y = 0" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

395 

47694  396 
have eq: "Q = P" 
397 
proof (rule P.KL_eq_0_iff_eq_ac[THEN iffD1]) 

398 
show "prob_space Q" by unfold_locales 

399 
show "absolutely_continuous P Q" by fact 

400 
show "integrable Q (entropy_density b P Q)" by fact 

401 
show "sets Q = sets P" by (simp add: P_def Q_def sets_pair_measure) 

402 
show "KL_divergence b P Q = 0" 

403 
using I_eq_0 unfolding mutual_information_def by (simp add: P_def Q_def) 

404 
qed 

405 
then show "distr M S X \<Otimes>\<^isub>M distr M T Y = distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))" 

406 
unfolding P_def Q_def .. } 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

407 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

408 

40859  409 
abbreviation (in information_space) 
410 
mutual_information_Pow ("\<I>'(_ ; _')") where 

47694  411 
"\<I>(X ; Y) \<equiv> mutual_information b (count_space (X`space M)) (count_space (Y`space M)) X Y" 
41689
3e39b0e730d6
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
hoelzl
parents:
41661
diff
changeset

412 

47694  413 
lemma (in information_space) 
414 
fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" 

415 
assumes "sigma_finite_measure S" "sigma_finite_measure T" 

416 
assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" 

417 
assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" 

418 
defines "f \<equiv> \<lambda>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" 

419 
shows mutual_information_distr: "mutual_information b S T X Y = integral\<^isup>L (S \<Otimes>\<^isub>M T) f" (is "?M = ?R") 

420 
and mutual_information_nonneg: "integrable (S \<Otimes>\<^isub>M T) f \<Longrightarrow> 0 \<le> mutual_information b S T X Y" 

40859  421 
proof  
47694  422 
have X: "random_variable S X" 
423 
using Px by (auto simp: distributed_def) 

424 
have Y: "random_variable T Y" 

425 
using Py by (auto simp: distributed_def) 

426 
interpret S: sigma_finite_measure S by fact 

427 
interpret T: sigma_finite_measure T by fact 

428 
interpret ST: pair_sigma_finite S T .. 

429 
interpret X: prob_space "distr M S X" using X by (rule prob_space_distr) 

430 
interpret Y: prob_space "distr M T Y" using Y by (rule prob_space_distr) 

431 
interpret XY: pair_prob_space "distr M S X" "distr M T Y" .. 

432 
let ?P = "S \<Otimes>\<^isub>M T" 

433 
let ?D = "distr M ?P (\<lambda>x. (X x, Y x))" 

434 

435 
{ fix A assume "A \<in> sets S" 

436 
with X Y have "emeasure (distr M S X) A = emeasure ?D (A \<times> space T)" 

437 
by (auto simp: emeasure_distr measurable_Pair measurable_space 

438 
intro!: arg_cong[where f="emeasure M"]) } 

439 
note marginal_eq1 = this 

440 
{ fix A assume "A \<in> sets T" 

441 
with X Y have "emeasure (distr M T Y) A = emeasure ?D (space S \<times> A)" 

442 
by (auto simp: emeasure_distr measurable_Pair measurable_space 

443 
intro!: arg_cong[where f="emeasure M"]) } 

444 
note marginal_eq2 = this 

445 

446 
have eq: "(\<lambda>x. ereal (Px (fst x) * Py (snd x))) = (\<lambda>(x, y). ereal (Px x) * ereal (Py y))" 

447 
by auto 

448 

449 
have distr_eq: "distr M S X \<Otimes>\<^isub>M distr M T Y = density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))" 

450 
unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] eq 

451 
proof (subst pair_measure_density) 

452 
show "(\<lambda>x. ereal (Px x)) \<in> borel_measurable S" "(\<lambda>y. ereal (Py y)) \<in> borel_measurable T" 

453 
"AE x in S. 0 \<le> ereal (Px x)" "AE y in T. 0 \<le> ereal (Py y)" 

454 
using Px Py by (auto simp: distributed_def) 

455 
show "sigma_finite_measure (density S Px)" unfolding Px(1)[THEN distributed_distr_eq_density, symmetric] .. 

456 
show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. 

457 
qed (fact  simp)+ 

458 

459 
have M: "?M = KL_divergence b (density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))) (density ?P (\<lambda>x. ereal (Pxy x)))" 

460 
unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] .. 

461 

462 
from Px Py have f: "(\<lambda>x. Px (fst x) * Py (snd x)) \<in> borel_measurable ?P" 

463 
by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') 

464 
have PxPy_nonneg: "AE x in ?P. 0 \<le> Px (fst x) * Py (snd x)" 

465 
proof (rule ST.AE_pair_measure) 

466 
show "{x \<in> space ?P. 0 \<le> Px (fst x) * Py (snd x)} \<in> sets ?P" 

467 
using f by auto 

468 
show "AE x in S. AE y in T. 0 \<le> Px (fst (x, y)) * Py (snd (x, y))" 

469 
using Px Py by (auto simp: zero_le_mult_iff dest!: distributed_real_AE) 

470 
qed 

471 

472 
have "(AE x in ?P. Px (fst x) = 0 \<longrightarrow> Pxy x = 0)" 

473 
by (rule subdensity_real[OF measurable_fst Pxy Px]) auto 

474 
moreover 

475 
have "(AE x in ?P. Py (snd x) = 0 \<longrightarrow> Pxy x = 0)" 

476 
by (rule subdensity_real[OF measurable_snd Pxy Py]) auto 

477 
ultimately have ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 \<longrightarrow> Pxy x = 0" 

478 
by eventually_elim auto 

479 

480 
show "?M = ?R" 

481 
unfolding M f_def 

482 
using b_gt_1 f PxPy_nonneg Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] ac 

483 
by (rule ST.KL_density_density) 

484 

485 
assume int: "integrable (S \<Otimes>\<^isub>M T) f" 

486 
show "0 \<le> ?M" unfolding M 

487 
proof (rule ST.KL_density_density_nonneg 

488 
[OF b_gt_1 f PxPy_nonneg _ Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] _ ac int[unfolded f_def]]) 

489 
show "prob_space (density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Pxy x))) " 

490 
unfolding distributed_distr_eq_density[OF Pxy, symmetric] 

491 
using distributed_measurable[OF Pxy] by (rule prob_space_distr) 

492 
show "prob_space (density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Px (fst x) * Py (snd x))))" 

493 
unfolding distr_eq[symmetric] by unfold_locales 

40859  494 
qed 
495 
qed 

496 

497 
lemma (in information_space) 

47694  498 
fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" 
499 
assumes "sigma_finite_measure S" "sigma_finite_measure T" 

500 
assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" 

501 
assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" 

502 
assumes ae: "AE x in S. AE y in T. Pxy (x, y) = Px x * Py y" 

503 
shows mutual_information_eq_0: "mutual_information b S T X Y = 0" 

36624  504 
proof  
47694  505 
interpret S: sigma_finite_measure S by fact 
506 
interpret T: sigma_finite_measure T by fact 

507 
interpret ST: pair_sigma_finite S T .. 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

508 

47694  509 
have "AE x in S \<Otimes>\<^isub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0" 
510 
by (rule subdensity_real[OF measurable_fst Pxy Px]) auto 

511 
moreover 

512 
have "AE x in S \<Otimes>\<^isub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" 

513 
by (rule subdensity_real[OF measurable_snd Pxy Py]) auto 

514 
moreover 

515 
have "AE x in S \<Otimes>\<^isub>M T. Pxy x = Px (fst x) * Py (snd x)" 

516 
using distributed_real_measurable[OF Px] distributed_real_measurable[OF Py] distributed_real_measurable[OF Pxy] 

517 
by (intro ST.AE_pair_measure) (auto simp: ae intro!: measurable_snd'' measurable_fst'') 

518 
ultimately have "AE x in S \<Otimes>\<^isub>M T. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) = 0" 

519 
by eventually_elim simp 

520 
then have "(\<integral>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) \<partial>(S \<Otimes>\<^isub>M T)) = (\<integral>x. 0 \<partial>(S \<Otimes>\<^isub>M T))" 

521 
by (rule integral_cong_AE) 

522 
then show ?thesis 

523 
by (subst mutual_information_distr[OF assms(15)]) simp 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

524 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

525 

47694  526 
lemma (in information_space) mutual_information_simple_distributed: 
527 
assumes X: "simple_distributed M X Px" and Y: "simple_distributed M Y Py" 

528 
assumes XY: "simple_distributed M (\<lambda>x. (X x, Y x)) Pxy" 

529 
shows "\<I>(X ; Y) = (\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x))`space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" 

530 
proof (subst mutual_information_distr[OF _ _ simple_distributed[OF X] simple_distributed[OF Y] simple_distributed_joint[OF XY]]) 

531 
note fin = simple_distributed_joint_finite[OF XY, simp] 

532 
show "sigma_finite_measure (count_space (X ` space M))" 

533 
by (simp add: sigma_finite_measure_count_space_finite) 

534 
show "sigma_finite_measure (count_space (Y ` space M))" 

535 
by (simp add: sigma_finite_measure_count_space_finite) 

536 
let ?Pxy = "\<lambda>x. (if x \<in> (\<lambda>x. (X x, Y x)) ` space M then Pxy x else 0)" 

537 
let ?f = "\<lambda>x. ?Pxy x * log b (?Pxy x / (Px (fst x) * Py (snd x)))" 

538 
have "\<And>x. ?f x = (if x \<in> (\<lambda>x. (X x, Y x)) ` space M then Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) else 0)" 

539 
by auto 

540 
with fin show "(\<integral> x. ?f x \<partial>(count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M))) = 

541 
(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" 

542 
by (auto simp add: pair_measure_count_space lebesgue_integral_count_space_finite setsum_cases split_beta' 

543 
intro!: setsum_cong) 

544 
qed 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

545 

47694  546 
lemma (in information_space) 
547 
fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" 

548 
assumes Px: "simple_distributed M X Px" and Py: "simple_distributed M Y Py" 

549 
assumes Pxy: "simple_distributed M (\<lambda>x. (X x, Y x)) Pxy" 

550 
assumes ae: "\<forall>x\<in>space M. Pxy (X x, Y x) = Px (X x) * Py (Y x)" 

551 
shows mutual_information_eq_0_simple: "\<I>(X ; Y) = 0" 

552 
proof (subst mutual_information_simple_distributed[OF Px Py Pxy]) 

553 
have "(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y))) = 

554 
(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. 0)" 

555 
by (intro setsum_cong) (auto simp: ae) 

556 
then show "(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. 

557 
Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y))) = 0" by simp 

558 
qed 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

559 

39097  560 
subsection {* Entropy *} 
561 

47694  562 
definition (in prob_space) entropy :: "real \<Rightarrow> 'b measure \<Rightarrow> ('a \<Rightarrow> 'b) \<Rightarrow> real" where 
563 
"entropy b S X =  KL_divergence b S (distr M S X)" 

564 

40859  565 
abbreviation (in information_space) 
566 
entropy_Pow ("\<H>'(_')") where 

47694  567 
"\<H>(X) \<equiv> entropy b (count_space (X`space M)) X" 
41981
cdf7693bbe08
reworked Probability theory: measures are not type restricted to positive extended reals
hoelzl
parents:
41833
diff
changeset

568 

49791  569 
lemma (in prob_space) distributed_RN_deriv: 
570 
assumes X: "distributed M S X Px" 

571 
shows "AE x in S. RN_deriv S (density S Px) x = Px x" 

572 
proof  

573 
note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X] 

574 
interpret X: prob_space "distr M S X" 

575 
using D(1) by (rule prob_space_distr) 

576 

577 
have sf: "sigma_finite_measure (distr M S X)" by default 

578 
show ?thesis 

579 
using D 

580 
apply (subst eq_commute) 

581 
apply (intro RN_deriv_unique_sigma_finite) 

582 
apply (auto intro: divide_nonneg_nonneg measure_nonneg 

583 
simp: distributed_distr_eq_density[symmetric, OF X] sf) 

584 
done 

585 
qed 

586 

49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

587 
lemma (in information_space) 
47694  588 
fixes X :: "'a \<Rightarrow> 'b" 
49785  589 
assumes X: "distributed M MX X f" 
49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

590 
shows entropy_distr: "entropy b MX X =  (\<integral>x. f x * log b (f x) \<partial>MX)" (is ?eq) 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

591 
proof  
49785  592 
note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X] 
49791  593 
note ae = distributed_RN_deriv[OF X] 
49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

594 

3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

595 
have ae_eq: "AE x in distr M MX X. log b (real (RN_deriv MX (distr M MX X) x)) = 
49785  596 
log b (f x)" 
597 
unfolding distributed_distr_eq_density[OF X] 

598 
apply (subst AE_density) 

599 
using D apply simp 

600 
using ae apply eventually_elim 

601 
apply auto 

602 
done 

49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

603 

3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

604 
have int_eq: " (\<integral> x. log b (f x) \<partial>distr M MX X) =  (\<integral> x. f x * log b (f x) \<partial>MX)" 
49785  605 
unfolding distributed_distr_eq_density[OF X] 
606 
using D 

607 
by (subst integral_density) 

608 
(auto simp: borel_measurable_ereal_iff) 

49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

609 

3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

610 
show ?eq 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

611 
unfolding entropy_def KL_divergence_def entropy_density_def comp_def 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

612 
apply (subst integral_cong_AE) 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

613 
apply (rule ae_eq) 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

614 
apply (rule int_eq) 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

615 
done 
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

616 
qed 
49785  617 

49786  618 
lemma (in prob_space) distributed_imp_emeasure_nonzero: 
619 
assumes X: "distributed M MX X Px" 

620 
shows "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> 0" 

621 
proof 

622 
note Px = distributed_borel_measurable[OF X] distributed_AE[OF X] 

623 
interpret X: prob_space "distr M MX X" 

624 
using distributed_measurable[OF X] by (rule prob_space_distr) 

625 

626 
assume "emeasure MX {x \<in> space MX. Px x \<noteq> 0} = 0" 

627 
with Px have "AE x in MX. Px x = 0" 

628 
by (intro AE_I[OF subset_refl]) (auto simp: borel_measurable_ereal_iff) 

629 
moreover 

630 
from X.emeasure_space_1 have "(\<integral>\<^isup>+x. Px x \<partial>MX) = 1" 

631 
unfolding distributed_distr_eq_density[OF X] using Px 

632 
by (subst (asm) emeasure_density) 

633 
(auto simp: borel_measurable_ereal_iff intro!: integral_cong cong: positive_integral_cong) 

634 
ultimately show False 

635 
by (simp add: positive_integral_cong_AE) 

636 
qed 

637 

638 
lemma (in information_space) entropy_le: 

639 
fixes Px :: "'b \<Rightarrow> real" and MX :: "'b measure" 

640 
assumes X: "distributed M MX X Px" 

641 
and fin: "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> \<infinity>" 

642 
and int: "integrable MX (\<lambda>x.  Px x * log b (Px x))" 

643 
shows "entropy b MX X \<le> log b (measure MX {x \<in> space MX. Px x \<noteq> 0})" 

644 
proof  

645 
note Px = distributed_borel_measurable[OF X] distributed_AE[OF X] 

646 
interpret X: prob_space "distr M MX X" 

647 
using distributed_measurable[OF X] by (rule prob_space_distr) 

648 

649 
have "  log b (measure MX {x \<in> space MX. Px x \<noteq> 0}) = 

650 
 log b (\<integral> x. indicator {x \<in> space MX. Px x \<noteq> 0} x \<partial>MX)" 

651 
using Px fin 

652 
by (subst integral_indicator) (auto simp: measure_def borel_measurable_ereal_iff) 

653 
also have " log b (\<integral> x. indicator {x \<in> space MX. Px x \<noteq> 0} x \<partial>MX) =  log b (\<integral> x. 1 / Px x \<partial>distr M MX X)" 

654 
unfolding distributed_distr_eq_density[OF X] using Px 

655 
apply (intro arg_cong[where f="log b"] arg_cong[where f=uminus]) 

656 
by (subst integral_density) (auto simp: borel_measurable_ereal_iff intro!: integral_cong) 

657 
also have "\<dots> \<le> (\<integral> x.  log b (1 / Px x) \<partial>distr M MX X)" 

658 
proof (rule X.jensens_inequality[of "\<lambda>x. 1 / Px x" "{0<..}" 0 1 "\<lambda>x.  log b x"]) 

659 
show "AE x in distr M MX X. 1 / Px x \<in> {0<..}" 

660 
unfolding distributed_distr_eq_density[OF X] 

661 
using Px by (auto simp: AE_density) 

662 
have [simp]: "\<And>x. x \<in> space MX \<Longrightarrow> ereal (if Px x = 0 then 0 else 1) = indicator {x \<in> space MX. Px x \<noteq> 0} x" 

663 
by (auto simp: one_ereal_def) 

664 
have "(\<integral>\<^isup>+ x. max 0 (ereal ( (if Px x = 0 then 0 else 1))) \<partial>MX) = (\<integral>\<^isup>+ x. 0 \<partial>MX)" 

665 
by (intro positive_integral_cong) (auto split: split_max) 

666 
then show "integrable (distr M MX X) (\<lambda>x. 1 / Px x)" 

667 
unfolding distributed_distr_eq_density[OF X] using Px 

668 
by (auto simp: positive_integral_density integrable_def borel_measurable_ereal_iff fin positive_integral_max_0 

669 
cong: positive_integral_cong) 

670 
have "integrable MX (\<lambda>x. Px x * log b (1 / Px x)) = 

671 
integrable MX (\<lambda>x.  Px x * log b (Px x))" 

672 
using Px 

673 
by (intro integrable_cong_AE) 

674 
(auto simp: borel_measurable_ereal_iff log_divide_eq 

675 
intro!: measurable_If) 

676 
then show "integrable (distr M MX X) (\<lambda>x.  log b (1 / Px x))" 

677 
unfolding distributed_distr_eq_density[OF X] 

678 
using Px int 

679 
by (subst integral_density) (auto simp: borel_measurable_ereal_iff) 

680 
qed (auto simp: minus_log_convex[OF b_gt_1]) 

681 
also have "\<dots> = (\<integral> x. log b (Px x) \<partial>distr M MX X)" 

682 
unfolding distributed_distr_eq_density[OF X] using Px 

683 
by (intro integral_cong_AE) (auto simp: AE_density log_divide_eq) 

684 
also have "\<dots> =  entropy b MX X" 

685 
unfolding distributed_distr_eq_density[OF X] using Px 

686 
by (subst entropy_distr[OF X]) (auto simp: borel_measurable_ereal_iff integral_density) 

687 
finally show ?thesis 

688 
by simp 

689 
qed 

690 

691 
lemma (in information_space) entropy_le_space: 

692 
fixes Px :: "'b \<Rightarrow> real" and MX :: "'b measure" 

693 
assumes X: "distributed M MX X Px" 

694 
and fin: "finite_measure MX" 

695 
and int: "integrable MX (\<lambda>x.  Px x * log b (Px x))" 

696 
shows "entropy b MX X \<le> log b (measure MX (space MX))" 

697 
proof  

698 
note Px = distributed_borel_measurable[OF X] distributed_AE[OF X] 

699 
interpret finite_measure MX by fact 

700 
have "entropy b MX X \<le> log b (measure MX {x \<in> space MX. Px x \<noteq> 0})" 

701 
using int X by (intro entropy_le) auto 

702 
also have "\<dots> \<le> log b (measure MX (space MX))" 

703 
using Px distributed_imp_emeasure_nonzero[OF X] 

704 
by (intro log_le) 

705 
(auto intro!: borel_measurable_ereal_iff finite_measure_mono b_gt_1 

706 
less_le[THEN iffD2] measure_nonneg simp: emeasure_eq_measure) 

707 
finally show ?thesis . 

708 
qed 

709 

49785  710 
lemma (in prob_space) uniform_distributed_params: 
711 
assumes X: "distributed M MX X (\<lambda>x. indicator A x / measure MX A)" 

712 
shows "A \<in> sets MX" "measure MX A \<noteq> 0" 

47694  713 
proof  
49785  714 
interpret X: prob_space "distr M MX X" 
715 
using distributed_measurable[OF X] by (rule prob_space_distr) 

716 

717 
show "measure MX A \<noteq> 0" 

718 
proof 

719 
assume "measure MX A = 0" 

720 
with X.emeasure_space_1 X.prob_space distributed_distr_eq_density[OF X] 

721 
show False 

722 
by (simp add: emeasure_density zero_ereal_def[symmetric]) 

723 
qed 

724 
with measure_notin_sets[of A MX] show "A \<in> sets MX" 

725 
by blast 

39097  726 
qed 
36624  727 

47694  728 
lemma (in information_space) entropy_uniform: 
49785  729 
assumes X: "distributed M MX X (\<lambda>x. indicator A x / measure MX A)" (is "distributed _ _ _ ?f") 
47694  730 
shows "entropy b MX X = log b (measure MX A)" 
49785  731 
proof (subst entropy_distr[OF X]) 
732 
have [simp]: "emeasure MX A \<noteq> \<infinity>" 

733 
using uniform_distributed_params[OF X] by (auto simp add: measure_def) 

734 
have eq: "(\<integral> x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \<partial>MX) = 

735 
(\<integral> x. ( log b (measure MX A) / measure MX A) * indicator A x \<partial>MX)" 

736 
using measure_nonneg[of MX A] uniform_distributed_params[OF X] 

737 
by (auto intro!: integral_cong split: split_indicator simp: log_divide_eq) 

738 
show " (\<integral> x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \<partial>MX) = 

739 
log b (measure MX A)" 

740 
unfolding eq using uniform_distributed_params[OF X] 

741 
by (subst lebesgue_integral_cmult) (auto simp: measure_def) 

742 
qed 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

743 

47694  744 
lemma (in information_space) entropy_simple_distributed: 
49786  745 
"simple_distributed M X f \<Longrightarrow> \<H>(X) =  (\<Sum>x\<in>X`space M. f x * log b (f x))" 
746 
by (subst entropy_distr[OF simple_distributed]) 

747 
(auto simp add: lebesgue_integral_count_space_finite) 

39097  748 

40859  749 
lemma (in information_space) entropy_le_card_not_0: 
47694  750 
assumes X: "simple_distributed M X f" 
751 
shows "\<H>(X) \<le> log b (card (X ` space M \<inter> {x. f x \<noteq> 0}))" 

39097  752 
proof  
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

753 
let ?X = "count_space (X`space M)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

754 
have "\<H>(X) \<le> log b (measure ?X {x \<in> space ?X. f x \<noteq> 0})" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

755 
by (rule entropy_le[OF simple_distributed[OF X]]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

756 
(simp_all add: simple_distributed_finite[OF X] subset_eq integrable_count_space emeasure_count_space) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

757 
also have "measure ?X {x \<in> space ?X. f x \<noteq> 0} = card (X ` space M \<inter> {x. f x \<noteq> 0})" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

758 
by (simp_all add: simple_distributed_finite[OF X] subset_eq emeasure_count_space measure_def Int_def) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

759 
finally show ?thesis . 
39097  760 
qed 
761 

40859  762 
lemma (in information_space) entropy_le_card: 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

763 
assumes X: "simple_distributed M X f" 
40859  764 
shows "\<H>(X) \<le> log b (real (card (X ` space M)))" 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

765 
proof  
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

766 
let ?X = "count_space (X`space M)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

767 
have "\<H>(X) \<le> log b (measure ?X (space ?X))" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

768 
by (rule entropy_le_space[OF simple_distributed[OF X]]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

769 
(simp_all add: simple_distributed_finite[OF X] subset_eq integrable_count_space emeasure_count_space finite_measure_count_space) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

770 
also have "measure ?X (space ?X) = card (X ` space M)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

771 
by (simp_all add: simple_distributed_finite[OF X] subset_eq emeasure_count_space measure_def) 
39097  772 
finally show ?thesis . 
773 
qed 

774 

775 
subsection {* Conditional Mutual Information *} 

776 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

777 
definition (in prob_space) 
41689
3e39b0e730d6
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
hoelzl
parents:
41661
diff
changeset

778 
"conditional_mutual_information b MX MY MZ X Y Z \<equiv> 
3e39b0e730d6
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
hoelzl
parents:
41661
diff
changeset

779 
mutual_information b MX (MY \<Otimes>\<^isub>M MZ) X (\<lambda>x. (Y x, Z x))  
3e39b0e730d6
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
hoelzl
parents:
41661
diff
changeset

780 
mutual_information b MX MZ X Z" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

781 

40859  782 
abbreviation (in information_space) 
783 
conditional_mutual_information_Pow ("\<I>'( _ ; _  _ ')") where 

36624  784 
"\<I>(X ; Y  Z) \<equiv> conditional_mutual_information b 
47694  785 
(count_space (X ` space M)) (count_space (Y ` space M)) (count_space (Z ` space M)) X Y Z" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

786 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

787 
lemma (in pair_sigma_finite) borel_measurable_positive_integral_fst: 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

788 
"(\<lambda>(x, y). f x y) \<in> borel_measurable (M1 \<Otimes>\<^isub>M M2) \<Longrightarrow> (\<lambda>x. \<integral>\<^isup>+ y. f x y \<partial>M2) \<in> borel_measurable M1" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

789 
using positive_integral_fst_measurable(1)[of "\<lambda>(x, y). f x y"] by simp 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

790 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

791 
lemma (in pair_sigma_finite) borel_measurable_positive_integral_snd: 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

792 
assumes "(\<lambda>(x, y). f x y) \<in> borel_measurable (M2 \<Otimes>\<^isub>M M1)" shows "(\<lambda>x. \<integral>\<^isup>+ y. f x y \<partial>M1) \<in> borel_measurable M2" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

793 
proof  
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

794 
interpret Q: pair_sigma_finite M2 M1 by default 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

795 
from Q.borel_measurable_positive_integral_fst assms show ?thesis by simp 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

796 
qed 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

797 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

798 
lemma (in information_space) 
47694  799 
assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" and P: "sigma_finite_measure P" 
800 
assumes Px: "distributed M S X Px" 

801 
assumes Pz: "distributed M P Z Pz" 

802 
assumes Pyz: "distributed M (T \<Otimes>\<^isub>M P) (\<lambda>x. (Y x, Z x)) Pyz" 

803 
assumes Pxz: "distributed M (S \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Z x)) Pxz" 

804 
assumes Pxyz: "distributed M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x)) Pxyz" 

805 
assumes I1: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" 

806 
assumes I2: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

807 
shows conditional_mutual_information_generic_eq: "conditional_mutual_information b S T P X Y Z 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

808 
= (\<integral>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" (is "?eq") 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

809 
and conditional_mutual_information_generic_nonneg: "0 \<le> conditional_mutual_information b S T P X Y Z" (is "?nonneg") 
40859  810 
proof  
47694  811 
interpret S: sigma_finite_measure S by fact 
812 
interpret T: sigma_finite_measure T by fact 

813 
interpret P: sigma_finite_measure P by fact 

814 
interpret TP: pair_sigma_finite T P .. 

815 
interpret SP: pair_sigma_finite S P .. 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

816 
interpret ST: pair_sigma_finite S T .. 
47694  817 
interpret SPT: pair_sigma_finite "S \<Otimes>\<^isub>M P" T .. 
818 
interpret STP: pair_sigma_finite S "T \<Otimes>\<^isub>M P" .. 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

819 
interpret TPS: pair_sigma_finite "T \<Otimes>\<^isub>M P" S .. 
47694  820 
have TP: "sigma_finite_measure (T \<Otimes>\<^isub>M P)" .. 
821 
have SP: "sigma_finite_measure (S \<Otimes>\<^isub>M P)" .. 

822 
have YZ: "random_variable (T \<Otimes>\<^isub>M P) (\<lambda>x. (Y x, Z x))" 

823 
using Pyz by (simp add: distributed_measurable) 

824 

825 
have Pxyz_f: "\<And>M f. f \<in> measurable M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) \<Longrightarrow> (\<lambda>x. Pxyz (f x)) \<in> borel_measurable M" 

826 
using measurable_comp[OF _ Pxyz[THEN distributed_real_measurable]] by (auto simp: comp_def) 

827 

828 
{ fix f g h M 

829 
assume f: "f \<in> measurable M S" and g: "g \<in> measurable M P" and h: "h \<in> measurable M (S \<Otimes>\<^isub>M P)" 

830 
from measurable_comp[OF h Pxz[THEN distributed_real_measurable]] 

831 
measurable_comp[OF f Px[THEN distributed_real_measurable]] 

832 
measurable_comp[OF g Pz[THEN distributed_real_measurable]] 

833 
have "(\<lambda>x. log b (Pxz (h x) / (Px (f x) * Pz (g x)))) \<in> borel_measurable M" 

834 
by (simp add: comp_def b_gt_1) } 

835 
note borel_log = this 

836 

837 
have measurable_cut: "(\<lambda>(x, y, z). (x, z)) \<in> measurable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (S \<Otimes>\<^isub>M P)" 

838 
by (auto simp add: split_beta' comp_def intro!: measurable_Pair measurable_snd') 

839 

840 
from Pxz Pxyz have distr_eq: "distr M (S \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Z x)) = 

841 
distr (distr M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x))) (S \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). (x, z))" 

842 
by (subst distr_distr[OF measurable_cut]) (auto dest: distributed_measurable simp: comp_def) 

40859  843 

47694  844 
have "mutual_information b S P X Z = 
845 
(\<integral>x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \<partial>(S \<Otimes>\<^isub>M P))" 

846 
by (rule mutual_information_distr[OF S P Px Pz Pxz]) 

847 
also have "\<dots> = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" 

848 
using b_gt_1 Pxz Px Pz 

849 
by (subst distributed_transform_integral[OF Pxyz Pxz, where T="\<lambda>(x, y, z). (x, z)"]) 

850 
(auto simp: split_beta' intro!: measurable_Pair measurable_snd' measurable_snd'' measurable_fst'' borel_measurable_times 

851 
dest!: distributed_real_measurable) 

852 
finally have mi_eq: 

853 
"mutual_information b S P X Z = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" . 

854 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

855 
have ae1: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Px (fst x) = 0 \<longrightarrow> Pxyz x = 0" 
47694  856 
by (intro subdensity_real[of fst, OF _ Pxyz Px]) auto 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

857 
moreover have ae2: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pz (snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" 
47694  858 
by (intro subdensity_real[of "\<lambda>x. snd (snd x)", OF _ Pxyz Pz]) (auto intro: measurable_snd') 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

859 
moreover have ae3: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pxz (fst x, snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" 
47694  860 
by (intro subdensity_real[of "\<lambda>x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) (auto intro: measurable_Pair measurable_snd') 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

861 
moreover have ae4: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pyz (snd x) = 0 \<longrightarrow> Pxyz x = 0" 
47694  862 
by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) (auto intro: measurable_Pair) 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

863 
moreover have ae5: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Px (fst x)" 
47694  864 
using Px by (intro STP.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable) 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

865 
moreover have ae6: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pyz (snd x)" 
47694  866 
using Pyz by (intro STP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

867 
moreover have ae7: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd (snd x))" 
47694  868 
using Pz Pz[THEN distributed_real_measurable] by (auto intro!: measurable_snd'' TP.AE_pair_measure STP.AE_pair_measure AE_I2[of S] dest: distributed_real_AE) 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

869 
moreover have ae8: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pxz (fst x, snd (snd x))" 
47694  870 
using Pxz[THEN distributed_real_AE, THEN SP.AE_pair] 
871 
using measurable_comp[OF measurable_Pair[OF measurable_fst measurable_comp[OF measurable_snd measurable_snd]] Pxz[THEN distributed_real_measurable], of T] 

872 
using measurable_comp[OF measurable_snd measurable_Pair2[OF Pxz[THEN distributed_real_measurable]], of _ T] 

873 
by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure simp: comp_def) 

874 
moreover note Pxyz[THEN distributed_real_AE] 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

875 
ultimately have ae: "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 
47694  876 
Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x)))  
877 
Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) = 

878 
Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " 

879 
proof eventually_elim 

880 
case (goal1 x) 

881 
show ?case 

40859  882 
proof cases 
47694  883 
assume "Pxyz x \<noteq> 0" 
884 
with goal1 have "0 < Px (fst x)" "0 < Pz (snd (snd x))" "0 < Pxz (fst x, snd (snd x))" "0 < Pyz (snd x)" "0 < Pxyz x" 

885 
by auto 

886 
then show ?thesis 

887 
using b_gt_1 by (simp add: log_simps mult_pos_pos less_imp_le field_simps) 

40859  888 
qed simp 
889 
qed 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

890 
with I1 I2 show ?eq 
40859  891 
unfolding conditional_mutual_information_def 
47694  892 
apply (subst mi_eq) 
893 
apply (subst mutual_information_distr[OF S TP Px Pyz Pxyz]) 

894 
apply (subst integral_diff(2)[symmetric]) 

895 
apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff) 

896 
done 

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

897 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

898 
let ?P = "density (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) Pxyz" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

899 
interpret P: prob_space ?P 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

900 
unfolding distributed_distr_eq_density[OF Pxyz, symmetric] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

901 
using distributed_measurable[OF Pxyz] by (rule prob_space_distr) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

902 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

903 
let ?Q = "density (T \<Otimes>\<^isub>M P) Pyz" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

904 
interpret Q: prob_space ?Q 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

905 
unfolding distributed_distr_eq_density[OF Pyz, symmetric] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

906 
using distributed_measurable[OF Pyz] by (rule prob_space_distr) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

907 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

908 
let ?f = "\<lambda>(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

909 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

910 
from subdensity_real[of snd, OF _ Pyz Pz] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

911 
have aeX1: "AE x in T \<Otimes>\<^isub>M P. Pz (snd x) = 0 \<longrightarrow> Pyz x = 0" by (auto simp: comp_def) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

912 
have aeX2: "AE x in T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd x)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

913 
using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

914 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

915 
have aeX3: "AE y in T \<Otimes>\<^isub>M P. (\<integral>\<^isup>+ x. ereal (Pxz (x, snd y)) \<partial>S) = ereal (Pz (snd y))" 
49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

916 
using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz] 
49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

917 
apply (intro TP.AE_pair_measure) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

918 
apply (auto simp: comp_def measurable_split_conv 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

919 
intro!: measurable_snd'' borel_measurable_ereal_eq borel_measurable_ereal 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

920 
SP.borel_measurable_positive_integral_snd measurable_compose[OF _ Pxz[THEN distributed_real_measurable]] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

921 
measurable_Pair 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

922 
dest: distributed_real_AE distributed_real_measurable) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

923 
done 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

924 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

925 
note M = borel_measurable_divide borel_measurable_diff borel_measurable_times borel_measurable_ereal 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

926 
measurable_compose[OF _ measurable_snd] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

927 
measurable_Pair 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

928 
measurable_compose[OF _ Pxyz[THEN distributed_real_measurable]] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

929 
measurable_compose[OF _ Pxz[THEN distributed_real_measurable]] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

930 
measurable_compose[OF _ Pyz[THEN distributed_real_measurable]] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

931 
measurable_compose[OF _ Pz[THEN distributed_real_measurable]] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

932 
measurable_compose[OF _ Px[THEN distributed_real_measurable]] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

933 
STP.borel_measurable_positive_integral_snd 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

934 
have "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<le> (\<integral>\<^isup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

935 
apply (subst positive_integral_density) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

936 
apply (rule distributed_borel_measurable[OF Pxyz]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

937 
apply (rule distributed_AE[OF Pxyz]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

938 
apply (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M) [] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

939 
apply (rule positive_integral_mono_AE) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

940 
using ae5 ae6 ae7 ae8 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

941 
apply eventually_elim 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

942 
apply (auto intro!: divide_nonneg_nonneg mult_nonneg_nonneg) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

943 
done 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

944 
also have "\<dots> = (\<integral>\<^isup>+(y, z). \<integral>\<^isup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \<partial>S \<partial>T \<Otimes>\<^isub>M P)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

945 
by (subst STP.positive_integral_snd_measurable[symmetric]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

946 
(auto simp add: borel_measurable_ereal_iff split_beta' intro!: M) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

947 
also have "\<dots> = (\<integral>\<^isup>+x. ereal (Pyz x) * 1 \<partial>T \<Otimes>\<^isub>M P)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

948 
apply (rule positive_integral_cong_AE) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

949 
using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

950 
apply eventually_elim 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

951 
proof (case_tac x, simp del: times_ereal.simps add: space_pair_measure) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

952 
fix a b assume "Pz b = 0 \<longrightarrow> Pyz (a, b) = 0" "0 \<le> Pz b" "a \<in> space T \<and> b \<in> space P" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

953 
"(\<integral>\<^isup>+ x. ereal (Pxz (x, b)) \<partial>S) = ereal (Pz b)" "0 \<le> Pyz (a, b)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

954 
then show "(\<integral>\<^isup>+ x. ereal (Pxz (x, b)) * ereal (Pyz (a, b) / Pz b) \<partial>S) = ereal (Pyz (a, b))" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

955 
apply (subst positive_integral_multc) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

956 
apply (auto intro!: borel_measurable_ereal divide_nonneg_nonneg 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

957 
measurable_compose[OF _ Pxz[THEN distributed_real_measurable]] measurable_Pair 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

958 
split: prod.split) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

959 
done 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

960 
qed 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

961 
also have "\<dots> = 1" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

962 
using Q.emeasure_space_1 distributed_AE[OF Pyz] distributed_distr_eq_density[OF Pyz] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

963 
by (subst positive_integral_density[symmetric]) (auto intro!: M) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

964 
finally have le1: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<le> 1" . 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

965 
also have "\<dots> < \<infinity>" by simp 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

966 
finally have fin: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<noteq> \<infinity>" by simp 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

967 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

968 
have pos: "(\<integral>\<^isup>+ x. ?f x \<partial>?P) \<noteq> 0" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

969 
apply (subst positive_integral_density) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

970 
apply (rule distributed_borel_measurable[OF Pxyz]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

971 
apply (rule distributed_AE[OF Pxyz]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

972 
apply (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M) [] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

973 
apply (simp add: split_beta') 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

974 
proof 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

975 
let ?g = "\<lambda>x. ereal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

976 
assume "(\<integral>\<^isup>+ x. ?g x \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P)) = 0" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

977 
then have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. ?g x \<le> 0" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

978 
by (intro positive_integral_0_iff_AE[THEN iffD1]) (auto intro!: M borel_measurable_ereal measurable_If) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

979 
then have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pxyz x = 0" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

980 
using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

981 
by eventually_elim (auto split: split_if_asm simp: mult_le_0_iff divide_le_0_iff) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

982 
then have "(\<integral>\<^isup>+ x. ereal (Pxyz x) \<partial>S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) = 0" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

983 
by (subst positive_integral_cong_AE[of _ "\<lambda>x. 0"]) auto 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

984 
with P.emeasure_space_1 show False 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

985 
by (subst (asm) emeasure_density) (auto intro!: M cong: positive_integral_cong) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

986 
qed 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

987 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

988 
have neg: "(\<integral>\<^isup>+ x.  ?f x \<partial>?P) = 0" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

989 
apply (rule positive_integral_0_iff_AE[THEN iffD2]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

990 
apply (auto intro!: M simp: split_beta') [] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

991 
apply (subst AE_density) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

992 
apply (auto intro!: M simp: split_beta') [] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

993 
using ae5 ae6 ae7 ae8 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

994 
apply eventually_elim 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

995 
apply (auto intro!: mult_nonneg_nonneg divide_nonneg_nonneg) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

996 
done 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

997 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

998 
have I3: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

999 
apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integral_diff(1)[OF I1 I2]]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1000 
using ae 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1001 
apply (auto intro!: M simp: split_beta') 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1002 
done 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1003 

d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1004 
have " log b 1 \<le>  log b (integral\<^isup>L ?P ?f)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1005 
proof (intro le_imp_neg_le log_le[OF b_gt_1]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1006 
show "0 < integral\<^isup>L ?P ?f" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1007 
using neg pos fin positive_integral_positive[of ?P ?f] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1008 
by (cases "(\<integral>\<^isup>+ x. ?f x \<partial>?P)") (auto simp add: lebesgue_integral_def less_le split_beta') 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1009 
show "integral\<^isup>L ?P ?f \<le> 1" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1010 
using neg le1 fin positive_integral_positive[of ?P ?f] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1011 
by (cases "(\<integral>\<^isup>+ x. ?f x \<partial>?P)") (auto simp add: lebesgue_integral_def split_beta' one_ereal_def) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1012 
qed 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1013 
also have " log b (integral\<^isup>L ?P ?f) \<le> (\<integral> x.  log b (?f x) \<partial>?P)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1014 
proof (rule P.jensens_inequality[where a=0 and b=1 and I="{0<..}"]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1015 
show "AE x in ?P. ?f x \<in> {0<..}" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1016 
unfolding AE_density[OF distributed_borel_measurable[OF Pxyz]] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1017 
using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1018 
by eventually_elim (auto simp: divide_pos_pos mult_pos_pos) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1019 
show "integrable ?P ?f" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1020 
unfolding integrable_def 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1021 
using fin neg by (auto intro!: M simp: split_beta') 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1022 
show "integrable ?P (\<lambda>x.  log b (?f x))" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1023 
apply (subst integral_density) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1024 
apply (auto intro!: M) [] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1025 
apply (auto intro!: M distributed_real_AE[OF Pxyz]) [] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1026 
apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') [] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1027 
apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1028 
apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') [] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1029 
apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') [] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1030 
using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1031 
apply eventually_elim 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1032 
apply (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1033 
done 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1034 
qed (auto simp: b_gt_1 minus_log_convex) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1035 
also have "\<dots> = conditional_mutual_information b S T P X Y Z" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1036 
unfolding `?eq` 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1037 
apply (subst integral_density) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1038 
apply (auto intro!: M) [] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1039 
apply (auto intro!: M distributed_real_AE[OF Pxyz]) [] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1040 
apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') [] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1041 
apply (intro integral_cong_AE) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1042 
using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1043 
apply eventually_elim 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1044 
apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1045 
done 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1046 
finally show ?nonneg 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1047 
by simp 
40859  1048 
qed 
1049 

1050 
lemma (in information_space) conditional_mutual_information_eq: 

47694  1051 
assumes Pz: "simple_distributed M Z Pz" 
1052 
assumes Pyz: "simple_distributed M (\<lambda>x. (Y x, Z x)) Pyz" 

1053 
assumes Pxz: "simple_distributed M (\<lambda>x. (X x, Z x)) Pxz" 

1054 
assumes Pxyz: "simple_distributed M (\<lambda>x. (X x, Y x, Z x)) Pxyz" 

1055 
shows "\<I>(X ; Y  Z) = 

1056 
(\<Sum>(x, y, z)\<in>(\<lambda>x. (X x, Y x, Z x))`space M. Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" 

1057 
proof (subst conditional_mutual_information_generic_eq[OF _ _ _ _ 

1058 
simple_distributed[OF Pz] simple_distributed_joint[OF Pyz] simple_distributed_joint[OF Pxz] 

1059 
simple_distributed_joint2[OF Pxyz]]) 

1060 
note simple_distributed_joint2_finite[OF Pxyz, simp] 

1061 
show "sigma_finite_measure (count_space (X ` space M))" 

1062 
by (simp add: sigma_finite_measure_count_space_finite) 

1063 
show "sigma_finite_measure (count_space (Y ` space M))" 

1064 
by (simp add: sigma_finite_measure_count_space_finite) 

1065 
show "sigma_finite_measure (count_space (Z ` space M))" 

1066 
by (simp add: sigma_finite_measure_count_space_finite) 

1067 
have "count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M) \<Otimes>\<^isub>M count_space (Z ` space M) = 

1068 
count_space (X`space M \<times> Y`space M \<times> Z`space M)" 

1069 
(is "?P = ?C") 

1070 
by (simp add: pair_measure_count_space) 

40859  1071 

47694  1072 
let ?Px = "\<lambda>x. measure M (X ` {x} \<inter> space M)" 
1073 
have "(\<lambda>x. (X x, Z x)) \<in> measurable M (count_space (X ` space M) \<Otimes>\<^isub>M count_space (Z ` space M))" 

1074 
using simple_distributed_joint[OF Pxz] by (rule distributed_measurable) 

1075 
from measurable_comp[OF this measurable_fst] 

1076 
have "random_variable (count_space (X ` space M)) X" 

1077 
by (simp add: comp_def) 

1078 
then have "simple_function M X" 

1079 
unfolding simple_function_def by auto 

1080 
then have "simple_distributed M X ?Px" 

1081 
by (rule simple_distributedI) auto 

1082 
then show "distributed M (count_space (X ` space M)) X ?Px" 

1083 
by (rule simple_distributed) 

1084 

1085 
let ?f = "(\<lambda>x. if x \<in> (\<lambda>x. (X x, Y x, Z x)) ` space M then Pxyz x else 0)" 

1086 
let ?g = "(\<lambda>x. if x \<in> (\<lambda>x. (Y x, Z x)) ` space M then Pyz x else 0)" 

1087 
let ?h = "(\<lambda>x. if x \<in> (\<lambda>x. (X x, Z x)) ` space M then Pxz x else 0)" 

1088 
show 

1089 
"integrable ?P (\<lambda>(x, y, z). ?f (x, y, z) * log b (?f (x, y, z) / (?Px x * ?g (y, z))))" 

1090 
"integrable ?P (\<lambda>(x, y, z). ?f (x, y, z) * log b (?h (x, z) / (?Px x * Pz z)))" 

1091 
by (auto intro!: integrable_count_space simp: pair_measure_count_space) 

1092 
let ?i = "\<lambda>x y z. ?f (x, y, z) * log b (?f (x, y, z) / (?h (x, z) * (?g (y, z) / Pz z)))" 

1093 
let ?j = "\<lambda>x y z. Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z)))" 

1094 
have "(\<lambda>(x, y, z). ?i x y z) = (\<lambda>x. if x \<in> (\<lambda>x. (X x, Y x, Z x)) ` space M then ?j (fst x) (fst (snd x)) (snd (snd x)) else 0)" 

1095 
by (auto intro!: ext) 

1096 
then show "(\<integral> (x, y, z). ?i x y z \<partial>?P) = (\<Sum>(x, y, z)\<in>(\<lambda>x. (X x, Y x, Z x)) ` space M. ?j x y z)" 

1097 
by (auto intro!: setsum_cong simp add: `?P = ?C` lebesgue_integral_count_space_finite simple_distributed_finite setsum_cases split_beta') 

36624  1098 
qed 
1099 

47694  1100 
lemma (in information_space) conditional_mutual_information_nonneg: 
1101 
assumes X: "simple_function M X" and Y: "simple_function M Y" and Z: "simple_function M Z" 

1102 
shows "0 \<le> \<I>(X ; Y  Z)" 

1103 
proof  

49787
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1104 
have [simp]: "count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M) \<Otimes>\<^isub>M count_space (Z ` space M) = 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1105 
count_space (X`space M \<times> Y`space M \<times> Z`space M)" 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1106 
by (simp add: pair_measure_count_space X Y Z simple_functionD) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1107 
note sf = sigma_finite_measure_count_space_finite[OF simple_functionD(1)] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1108 
note sd = simple_distributedI[OF _ refl] 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1109 
note sp = simple_function_Pair 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1110 
show ?thesis 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1111 
apply (rule conditional_mutual_information_generic_nonneg[OF sf[OF X] sf[OF Y] sf[OF Z]]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1112 
apply (rule simple_distributed[OF sd[OF X]]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1113 
apply (rule simple_distributed[OF sd[OF Z]]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1114 
apply (rule simple_distributed_joint[OF sd[OF sp[OF Y Z]]]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1115 
apply (rule simple_distributed_joint[OF sd[OF sp[OF X Z]]]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1116 
apply (rule simple_distributed_joint2[OF sd[OF sp[OF X sp[OF Y Z]]]]) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1117 
apply (auto intro!: integrable_count_space simp: X Y Z simple_functionD) 
d8de705b48d4
rule to show that conditional mutual information is nonnegative in the continuous case
hoelzl
parents:
49786
diff
changeset

1118 
done 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

1119 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

1120 

39097  1121 
subsection {* Conditional Entropy *} 
1122 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

1123 
definition (in prob_space) 
49791  1124 
"conditional_entropy b S T X Y =  (\<integral>(x, y). log b (real (RN_deriv (S \<Otimes>\<^isub>M T) (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) (x, y)) / 
1125 
real (RN_deriv T (distr M T Y) y)) \<partial>distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)))" 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

1126 

40859  1127 
abbreviation (in information_space) 
1128 
conditional_entropy_Pow ("\<H>'(_  _')") where 

47694  1129 
"\<H>(X  Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

1130 

49791  1131 
lemma (in information_space) conditional_entropy_generic_eq: 
1132 
fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" 

1133 
assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" 

1134 
assumes Py: "distributed M T Y Py" 

1135 
assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" 

1136 
shows "conditional_entropy b S T X Y =  (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))" 

1137 
proof  

1138 
interpret S: sigma_finite_measure S by fact 

1139 
interpret T: sigma_finite_measure T by fact 

1140 
interpret ST: pair_sigma_finite S T .. 

1141 

1142 
have "AE x in density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Pxy x)). Pxy x = real (RN_deriv (S \<Otimes>\<^isub>M T) (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) x)" 

1143 
unfolding AE_density[OF distributed_borel_measurable, OF Pxy] 

1144 
unfolding distributed_distr_eq_density[OF Pxy] 

1145 
using distributed_RN_deriv[OF Pxy] 

1146 
by auto 

1147 
moreover 

1148 
have "AE x in density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Pxy x)). Py (snd x) = real (RN_deriv T (distr M T Y) (snd x))" 

1149 
unfolding AE_density[OF distributed_borel_measurable, OF Pxy] 

1150 
unfolding distributed_distr_eq_density[OF Py] 

1151 
apply (rule ST.AE_pair_measure) 

1152 
apply (auto intro!: sets_Collect borel_measurable_eq measurable_compose[OF _ distributed_real_measurable[OF Py]] 

1153 
distributed_real_measurable[OF Pxy] distributed_real_AE[OF Py] 

1154 
borel_measurable_real_of_ereal measurable_compose[OF _ borel_measurable_RN_deriv_density]) 

1155 
using distributed_RN_deriv[OF Py] 

1156 
apply auto 

1157 
done 

1158 
ultimately 

1159 
have "conditional_entropy b S T X Y =  (\<integral>x. Pxy x * log b (Pxy x / Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))" 

1160 
unfolding conditional_entropy_def neg_equal_iff_equal 

1161 
apply (subst integral_density(1)[symmetric]) 

1162 
apply (auto simp: distributed_real_measurable[OF Pxy] distributed_real_AE[OF Pxy] 

1163 
measurable_compose[OF _ distributed_real_measurable[OF Py]] 

1164 
distributed_distr_eq_density[OF Pxy] 

1165 
intro!: integral_cong_AE) 

1166 
done 

1167 
then show ?thesis by (simp add: split_beta') 

1168 
qed 

1169 

1170 
lemma (in information_space) conditional_entropy_eq_entropy: 

47694  1171 
fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" 
1172 
assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" 

1173 
assumes Py: "distributed M T Y Py" 

1174 
assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" 

1175 
assumes I1: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Pxy x))" 

1176 
assumes I2: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))" 

49791  1177 
shows "conditional_entropy b S T X Y = entropy b (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))  entropy b T Y" 
40859  1178 
proof  
47694  1179 
interpret S: sigma_finite_measure S by fact 
1180 
interpret T: sigma_finite_measure T by fact 

1181 
interpret ST: pair_sigma_finite S T .. 

1182 

1183 
have "entropy b T Y =  (\<integral>y. Py y * log b (Py y) \<partial>T)" 

49786  1184 
by (rule entropy_distr[OF Py]) 
47694  1185 
also have "\<dots> =  (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))" 
1186 
using b_gt_1 Py[THEN distributed_real_measurable] 

1187 
by (subst distributed_transform_integral[OF Pxy Py, where T=snd]) (auto intro!: integral_cong) 

1188 
finally have e_eq: "entropy b T Y =  (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))" . 

49791  1189 

49790
6b9b9ebba47d
remove unneeded assumption from conditional_entropy_generic_eq
hoelzl
parents:
49788
diff
changeset

1190 
have ae2: "AE x in S \<Otimes>\<^isub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" 
47694  1191 
by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair) 
49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

1192 
moreover have ae4: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Py (snd x)" 
47694  1193 
using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) 
49788
3c10763f5cb4
show and use distributed_swap and distributed_jointI
hoelzl
parents:
49787
diff
changeset

1194 
moreover note ae5 = Pxy[THEN distributed_real_AE] 
49791  1195 
ultimately have "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Pxy x \<and> 0 \<le> Py (snd x) \<and> 
49790
6b9b9ebba47d
remove unneeded assumption from conditional_entropy_generic_eq
hoelzl
parents:
49788
diff
changeset

1196 
(Pxy x = 0 \<or> (Pxy x \<noteq> 0 \<longrightarrow> 0 < Pxy x \<and> 0 < Py (snd x)))" 
47694  1197 
by eventually_elim auto 
49791  1198 
then have ae: "AE x in S \<Otimes>\<^isub>M T. 
47694  1199 
Pxy x * log b (Pxy x)  Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / Py (snd x))" 
1200 
by eventually_elim (auto simp: log_simps mult_pos_pos field_simps b_gt_1) 

49791  1201 
have "conditional_entropy b S T X Y = 
1202 
 (\<integral>x. Pxy x * log b (Pxy x)  Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))" 

1203 
unfolding conditional_entropy_generic_eq[OF S T Py Pxy] neg_equal_iff_equal 

1204 
apply (intro integral_cong_AE) 

1205 
using ae 

1206 
apply eventually_elim 

1207 
apply auto 

47694  1208 
done 
49791  1209 
also have "\<dots> =  (\<integral>x. Pxy x * log b (Pxy x) \<partial>(S \<Otimes>\<^isub>M T))   (\<integral>x. Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))" 
1210 
by (simp add: integral_diff[OF I1 I2]) 

1211 
finally show ?thesis 

1212 
unfolding conditional_entropy_generic_eq[OF S T Py Pxy] entropy_distr[OF Pxy] e_eq 

1213 
by (simp add: split_beta') 

1214 
qed 

1215 

1216 
lemma (in information_space) conditional_entropy_eq_entropy_simple: 

1217 
assumes X: "simple_function M X" and Y: "simple_function M Y" 

1218 
shows "\<H>(X  Y) = entropy b (count_space (X`space M) \<Otimes>\<^isub>M count_space (Y`space M)) (\<lambda>x. (X x, Y x))  \<H>(Y)" 

1219 
proof  

1220 
have "count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M) = count_space (X`space M \<times> Y`space M)" 

1221 
(is "?P = ?C") using X Y by (simp add: simple_functionD pair_measure_count_space) 

1222 
show ?thesis 

1223 
by (rule conditional_entropy_eq_entropy sigma_finite_measure_count_space_finite 

1224 
simple_functionD X Y simple_distributed simple_distributedI[OF _ refl] 

1225 
simple_distributed_joint simple_function_Pair integrable_count_space)+ 

e0854abfb3fc
alternative definition of conditional entropy
hoe 