author  hoelzl 
Wed, 10 Oct 2012 12:12:24 +0200  
changeset 49785  0a8adca22974 
parent 49776  199d1d5bb17e 
child 49786  f33d5f009627 
permissions  rwrr 
42067  1 
(* Title: HOL/Probability/Information.thy 
2 
Author: Johannes Hölzl, TU München 

3 
Author: Armin Heller, TU München 

4 
*) 

5 

6 
header {*Information theory*} 

7 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

8 
theory Information 
41413
64cd30d6b0b8
explicit file specifications  avoid secondary load path;
wenzelm
parents:
41095
diff
changeset

9 
imports 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

10 
Independent_Family 
43556
0d78c8d31d0d
move conditional expectation to its own theory file
hoelzl
parents:
43340
diff
changeset

11 
Radon_Nikodym 
41413
64cd30d6b0b8
explicit file specifications  avoid secondary load path;
wenzelm
parents:
41095
diff
changeset

12 
"~~/src/HOL/Library/Convex" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

13 
begin 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

14 

39097  15 
lemma log_le: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log a x \<le> log a y" 
16 
by (subst log_le_cancel_iff) auto 

17 

18 
lemma log_less: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x < y \<Longrightarrow> log a x < log a y" 

19 
by (subst log_less_cancel_iff) auto 

20 

21 
lemma setsum_cartesian_product': 

22 
"(\<Sum>x\<in>A \<times> B. f x) = (\<Sum>x\<in>A. setsum (\<lambda>y. f (x, y)) B)" 

23 
unfolding setsum_cartesian_product by simp 

24 

36624  25 
section "Convex theory" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

26 

36624  27 
lemma log_setsum: 
28 
assumes "finite s" "s \<noteq> {}" 

29 
assumes "b > 1" 

30 
assumes "(\<Sum> i \<in> s. a i) = 1" 

31 
assumes "\<And> i. i \<in> s \<Longrightarrow> a i \<ge> 0" 

32 
assumes "\<And> i. i \<in> s \<Longrightarrow> y i \<in> {0 <..}" 

33 
shows "log b (\<Sum> i \<in> s. a i * y i) \<ge> (\<Sum> i \<in> s. a i * log b (y i))" 

34 
proof  

35 
have "convex_on {0 <..} (\<lambda> x.  log b x)" 

36 
by (rule minus_log_convex[OF `b > 1`]) 

37 
hence " log b (\<Sum> i \<in> s. a i * y i) \<le> (\<Sum> i \<in> s. a i *  log b (y i))" 

44890
22f665a2e91c
new fastforce replacing fastsimp  less confusing name
nipkow
parents:
43920
diff
changeset

38 
using convex_on_setsum[of _ _ "\<lambda> x.  log b x"] assms pos_is_convex by fastforce 
36624  39 
thus ?thesis by (auto simp add:setsum_negf le_imp_neg_le) 
40 
qed 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

41 

36624  42 
lemma log_setsum': 
43 
assumes "finite s" "s \<noteq> {}" 

44 
assumes "b > 1" 

45 
assumes "(\<Sum> i \<in> s. a i) = 1" 

46 
assumes pos: "\<And> i. i \<in> s \<Longrightarrow> 0 \<le> a i" 

47 
"\<And> i. \<lbrakk> i \<in> s ; 0 < a i \<rbrakk> \<Longrightarrow> 0 < y i" 

48 
shows "log b (\<Sum> i \<in> s. a i * y i) \<ge> (\<Sum> i \<in> s. a i * log b (y i))" 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

49 
proof  
36624  50 
have "\<And>y. (\<Sum> i \<in> s  {i. a i = 0}. a i * y i) = (\<Sum> i \<in> s. a i * y i)" 
51 
using assms by (auto intro!: setsum_mono_zero_cong_left) 

52 
moreover have "log b (\<Sum> i \<in> s  {i. a i = 0}. a i * y i) \<ge> (\<Sum> i \<in> s  {i. a i = 0}. a i * log b (y i))" 

53 
proof (rule log_setsum) 

54 
have "setsum a (s  {i. a i = 0}) = setsum a s" 

55 
using assms(1) by (rule setsum_mono_zero_cong_left) auto 

56 
thus sum_1: "setsum a (s  {i. a i = 0}) = 1" 

57 
"finite (s  {i. a i = 0})" using assms by simp_all 

58 

59 
show "s  {i. a i = 0} \<noteq> {}" 

60 
proof 

61 
assume *: "s  {i. a i = 0} = {}" 

62 
hence "setsum a (s  {i. a i = 0}) = 0" by (simp add: * setsum_empty) 

63 
with sum_1 show False by simp 

38656  64 
qed 
36624  65 

66 
fix i assume "i \<in> s  {i. a i = 0}" 

67 
hence "i \<in> s" "a i \<noteq> 0" by simp_all 

68 
thus "0 \<le> a i" "y i \<in> {0<..}" using pos[of i] by auto 

69 
qed fact+ 

70 
ultimately show ?thesis by simp 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

71 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

72 

36624  73 
lemma log_setsum_divide: 
74 
assumes "finite S" and "S \<noteq> {}" and "1 < b" 

75 
assumes "(\<Sum>x\<in>S. g x) = 1" 

76 
assumes pos: "\<And>x. x \<in> S \<Longrightarrow> g x \<ge> 0" "\<And>x. x \<in> S \<Longrightarrow> f x \<ge> 0" 

77 
assumes g_pos: "\<And>x. \<lbrakk> x \<in> S ; 0 < g x \<rbrakk> \<Longrightarrow> 0 < f x" 

78 
shows " (\<Sum>x\<in>S. g x * log b (g x / f x)) \<le> log b (\<Sum>x\<in>S. f x)" 

79 
proof  

80 
have log_mono: "\<And>x y. 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log b x \<le> log b y" 

81 
using `1 < b` by (subst log_le_cancel_iff) auto 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

82 

36624  83 
have " (\<Sum>x\<in>S. g x * log b (g x / f x)) = (\<Sum>x\<in>S. g x * log b (f x / g x))" 
84 
proof (unfold setsum_negf[symmetric], rule setsum_cong) 

85 
fix x assume x: "x \<in> S" 

86 
show " (g x * log b (g x / f x)) = g x * log b (f x / g x)" 

87 
proof (cases "g x = 0") 

88 
case False 

89 
with pos[OF x] g_pos[OF x] have "0 < f x" "0 < g x" by simp_all 

90 
thus ?thesis using `1 < b` by (simp add: log_divide field_simps) 

91 
qed simp 

92 
qed rule 

93 
also have "... \<le> log b (\<Sum>x\<in>S. g x * (f x / g x))" 

94 
proof (rule log_setsum') 

95 
fix x assume x: "x \<in> S" "0 < g x" 

96 
with g_pos[OF x] show "0 < f x / g x" by (safe intro!: divide_pos_pos) 

97 
qed fact+ 

98 
also have "... = log b (\<Sum>x\<in>S  {x. g x = 0}. f x)" using `finite S` 

99 
by (auto intro!: setsum_mono_zero_cong_right arg_cong[where f="log b"] 

100 
split: split_if_asm) 

101 
also have "... \<le> log b (\<Sum>x\<in>S. f x)" 

102 
proof (rule log_mono) 

103 
have "0 = (\<Sum>x\<in>S  {x. g x = 0}. 0)" by simp 

104 
also have "... < (\<Sum>x\<in>S  {x. g x = 0}. f x)" (is "_ < ?sum") 

105 
proof (rule setsum_strict_mono) 

106 
show "finite (S  {x. g x = 0})" using `finite S` by simp 

107 
show "S  {x. g x = 0} \<noteq> {}" 

108 
proof 

109 
assume "S  {x. g x = 0} = {}" 

110 
hence "(\<Sum>x\<in>S. g x) = 0" by (subst setsum_0') auto 

111 
with `(\<Sum>x\<in>S. g x) = 1` show False by simp 

112 
qed 

113 
fix x assume "x \<in> S  {x. g x = 0}" 

114 
thus "0 < f x" using g_pos[of x] pos(1)[of x] by auto 

115 
qed 

116 
finally show "0 < ?sum" . 

117 
show "(\<Sum>x\<in>S  {x. g x = 0}. f x) \<le> (\<Sum>x\<in>S. f x)" 

118 
using `finite S` pos by (auto intro!: setsum_mono2) 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

119 
qed 
36624  120 
finally show ?thesis . 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

121 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

122 

39097  123 
lemma split_pairs: 
40859  124 
"((A, B) = X) \<longleftrightarrow> (fst X = A \<and> snd X = B)" and 
125 
"(X = (A, B)) \<longleftrightarrow> (fst X = A \<and> snd X = B)" by auto 

38656  126 

127 
section "Information theory" 

128 

40859  129 
locale information_space = prob_space + 
38656  130 
fixes b :: real assumes b_gt_1: "1 < b" 
131 

40859  132 
context information_space 
38656  133 
begin 
134 

40859  135 
text {* Introduce some simplification rules for logarithm of base @{term b}. *} 
136 

137 
lemma log_neg_const: 

138 
assumes "x \<le> 0" 

139 
shows "log b x = log b 0" 

36624  140 
proof  
40859  141 
{ fix u :: real 
142 
have "x \<le> 0" by fact 

143 
also have "0 < exp u" 

144 
using exp_gt_zero . 

145 
finally have "exp u \<noteq> x" 

146 
by auto } 

147 
then show "log b x = log b 0" 

148 
by (simp add: log_def ln_def) 

38656  149 
qed 
150 

40859  151 
lemma log_mult_eq: 
152 
"log b (A * B) = (if 0 < A * B then log b \<bar>A\<bar> + log b \<bar>B\<bar> else log b 0)" 

153 
using log_mult[of b "\<bar>A\<bar>" "\<bar>B\<bar>"] b_gt_1 log_neg_const[of "A * B"] 

154 
by (auto simp: zero_less_mult_iff mult_le_0_iff) 

38656  155 

40859  156 
lemma log_inverse_eq: 
157 
"log b (inverse B) = (if 0 < B then  log b B else log b 0)" 

158 
using log_inverse[of b B] log_neg_const[of "inverse B"] b_gt_1 by simp 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

159 

40859  160 
lemma log_divide_eq: 
161 
"log b (A / B) = (if 0 < A * B then log b \<bar>A\<bar>  log b \<bar>B\<bar> else log b 0)" 

162 
unfolding divide_inverse log_mult_eq log_inverse_eq abs_inverse 

163 
by (auto simp: zero_less_mult_iff mult_le_0_iff) 

38656  164 

40859  165 
lemmas log_simps = log_mult_eq log_inverse_eq log_divide_eq 
38656  166 

167 
end 

168 

39097  169 
subsection "Kullback$$Leibler divergence" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

170 

39097  171 
text {* The Kullback$$Leibler divergence is also known as relative entropy or 
172 
Kullback$$Leibler distance. *} 

173 

174 
definition 

47694  175 
"entropy_density b M N = log b \<circ> real \<circ> RN_deriv M N" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

176 

60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

177 
definition 
47694  178 
"KL_divergence b M N = integral\<^isup>L N (entropy_density b M N)" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

179 

60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

180 
lemma (in information_space) measurable_entropy_density: 
47694  181 
assumes ac: "absolutely_continuous M N" "sets N = events" 
182 
shows "entropy_density b M N \<in> borel_measurable M" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

183 
proof  
47694  184 
from borel_measurable_RN_deriv[OF ac] b_gt_1 show ?thesis 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

185 
unfolding entropy_density_def 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

186 
by (intro measurable_comp) auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

187 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

188 

47694  189 
lemma (in sigma_finite_measure) KL_density: 
190 
fixes f :: "'a \<Rightarrow> real" 

191 
assumes "1 < b" 

192 
assumes f: "f \<in> borel_measurable M" "AE x in M. 0 \<le> f x" 

193 
shows "KL_divergence b M (density M f) = (\<integral>x. f x * log b (f x) \<partial>M)" 

194 
unfolding KL_divergence_def 

195 
proof (subst integral_density) 

196 
show "entropy_density b M (density M (\<lambda>x. ereal (f x))) \<in> borel_measurable M" 

49776  197 
using f 
47694  198 
by (auto simp: comp_def entropy_density_def intro!: borel_measurable_log borel_measurable_RN_deriv_density) 
199 
have "density M (RN_deriv M (density M f)) = density M f" 

200 
using f by (intro density_RN_deriv_density) auto 

201 
then have eq: "AE x in M. RN_deriv M (density M f) x = f x" 

202 
using f 

203 
by (intro density_unique) 

204 
(auto intro!: borel_measurable_log borel_measurable_RN_deriv_density simp: RN_deriv_density_nonneg) 

205 
show "(\<integral>x. f x * entropy_density b M (density M (\<lambda>x. ereal (f x))) x \<partial>M) = (\<integral>x. f x * log b (f x) \<partial>M)" 

206 
apply (intro integral_cong_AE) 

207 
using eq 

208 
apply eventually_elim 

209 
apply (auto simp: entropy_density_def) 

210 
done 

211 
qed fact+ 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

212 

47694  213 
lemma (in sigma_finite_measure) KL_density_density: 
214 
fixes f g :: "'a \<Rightarrow> real" 

215 
assumes "1 < b" 

216 
assumes f: "f \<in> borel_measurable M" "AE x in M. 0 \<le> f x" 

217 
assumes g: "g \<in> borel_measurable M" "AE x in M. 0 \<le> g x" 

218 
assumes ac: "AE x in M. f x = 0 \<longrightarrow> g x = 0" 

219 
shows "KL_divergence b (density M f) (density M g) = (\<integral>x. g x * log b (g x / f x) \<partial>M)" 

220 
proof  

221 
interpret Mf: sigma_finite_measure "density M f" 

222 
using f by (subst sigma_finite_iff_density_finite) auto 

223 
have "KL_divergence b (density M f) (density M g) = 

224 
KL_divergence b (density M f) (density (density M f) (\<lambda>x. g x / f x))" 

225 
using f g ac by (subst density_density_divide) simp_all 

226 
also have "\<dots> = (\<integral>x. (g x / f x) * log b (g x / f x) \<partial>density M f)" 

227 
using f g `1 < b` by (intro Mf.KL_density) (auto simp: AE_density divide_nonneg_nonneg) 

228 
also have "\<dots> = (\<integral>x. g x * log b (g x / f x) \<partial>M)" 

229 
using ac f g `1 < b` by (subst integral_density) (auto intro!: integral_cong_AE) 

230 
finally show ?thesis . 

231 
qed 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

232 

47694  233 
lemma (in information_space) KL_gt_0: 
234 
fixes D :: "'a \<Rightarrow> real" 

235 
assumes "prob_space (density M D)" 

236 
assumes D: "D \<in> borel_measurable M" "AE x in M. 0 \<le> D x" 

237 
assumes int: "integrable M (\<lambda>x. D x * log b (D x))" 

238 
assumes A: "density M D \<noteq> M" 

239 
shows "0 < KL_divergence b M (density M D)" 

240 
proof  

241 
interpret N: prob_space "density M D" by fact 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

242 

47694  243 
obtain A where "A \<in> sets M" "emeasure (density M D) A \<noteq> emeasure M A" 
244 
using measure_eqI[of "density M D" M] `density M D \<noteq> M` by auto 

245 

246 
let ?D_set = "{x\<in>space M. D x \<noteq> 0}" 

247 
have [simp, intro]: "?D_set \<in> sets M" 

248 
using D by auto 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

249 

43920  250 
have D_neg: "(\<integral>\<^isup>+ x. ereal ( D x) \<partial>M) = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

251 
using D by (subst positive_integral_0_iff_AE) auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

252 

47694  253 
have "(\<integral>\<^isup>+ x. ereal (D x) \<partial>M) = emeasure (density M D) (space M)" 
254 
using D by (simp add: emeasure_density cong: positive_integral_cong) 

43920  255 
then have D_pos: "(\<integral>\<^isup>+ x. ereal (D x) \<partial>M) = 1" 
47694  256 
using N.emeasure_space_1 by simp 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

257 

47694  258 
have "integrable M D" "integral\<^isup>L M D = 1" 
259 
using D D_pos D_neg unfolding integrable_def lebesgue_integral_def by simp_all 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

260 

47694  261 
have "0 \<le> 1  measure M ?D_set" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

262 
using prob_le_1 by (auto simp: field_simps) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

263 
also have "\<dots> = (\<integral> x. D x  indicator ?D_set x \<partial>M)" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

264 
using `integrable M D` `integral\<^isup>L M D = 1` 
47694  265 
by (simp add: emeasure_eq_measure) 
266 
also have "\<dots> < (\<integral> x. D x * (ln b * log b (D x)) \<partial>M)" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

267 
proof (rule integral_less_AE) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

268 
show "integrable M (\<lambda>x. D x  indicator ?D_set x)" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

269 
using `integrable M D` 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

270 
by (intro integral_diff integral_indicator) auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

271 
next 
47694  272 
from integral_cmult(1)[OF int, of "ln b"] 
273 
show "integrable M (\<lambda>x. D x * (ln b * log b (D x)))" 

274 
by (simp add: ac_simps) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

275 
next 
47694  276 
show "emeasure M {x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} \<noteq> 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

277 
proof 
47694  278 
assume eq_0: "emeasure M {x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} = 0" 
279 
then have disj: "AE x in M. D x = 1 \<or> D x = 0" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

280 
using D(1) by (auto intro!: AE_I[OF subset_refl] sets_Collect) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

281 

47694  282 
have "emeasure M {x\<in>space M. D x = 1} = (\<integral>\<^isup>+ x. indicator {x\<in>space M. D x = 1} x \<partial>M)" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

283 
using D(1) by auto 
47694  284 
also have "\<dots> = (\<integral>\<^isup>+ x. ereal (D x) \<partial>M)" 
43920  285 
using disj by (auto intro!: positive_integral_cong_AE simp: indicator_def one_ereal_def) 
47694  286 
finally have "AE x in M. D x = 1" 
287 
using D D_pos by (intro AE_I_eq_1) auto 

43920  288 
then have "(\<integral>\<^isup>+x. indicator A x\<partial>M) = (\<integral>\<^isup>+x. ereal (D x) * indicator A x\<partial>M)" 
289 
by (intro positive_integral_cong_AE) (auto simp: one_ereal_def[symmetric]) 

47694  290 
also have "\<dots> = density M D A" 
291 
using `A \<in> sets M` D by (simp add: emeasure_density) 

292 
finally show False using `A \<in> sets M` `emeasure (density M D) A \<noteq> emeasure M A` by simp 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

293 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

294 
show "{x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} \<in> sets M" 
47694  295 
using D(1) by (auto intro: sets_Collect_conj) 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

296 

47694  297 
show "AE t in M. t \<in> {x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} \<longrightarrow> 
298 
D t  indicator ?D_set t \<noteq> D t * (ln b * log b (D t))" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

299 
using D(2) 
47694  300 
proof (eventually_elim, safe) 
301 
fix t assume Dt: "t \<in> space M" "D t \<noteq> 1" "D t \<noteq> 0" "0 \<le> D t" 

302 
and eq: "D t  indicator ?D_set t = D t * (ln b * log b (D t))" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

303 

60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

304 
have "D t  1 = D t  indicator ?D_set t" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

305 
using Dt by simp 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

306 
also note eq 
47694  307 
also have "D t * (ln b * log b (D t)) =  D t * ln (1 / D t)" 
308 
using b_gt_1 `D t \<noteq> 0` `0 \<le> D t` 

309 
by (simp add: log_def ln_div less_le) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

310 
finally have "ln (1 / D t) = 1 / D t  1" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

311 
using `D t \<noteq> 0` by (auto simp: field_simps) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

312 
from ln_eq_minus_one[OF _ this] `D t \<noteq> 0` `0 \<le> D t` `D t \<noteq> 1` 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

313 
show False by auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

314 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

315 

47694  316 
show "AE t in M. D t  indicator ?D_set t \<le> D t * (ln b * log b (D t))" 
317 
using D(2) AE_space 

318 
proof eventually_elim 

319 
fix t assume "t \<in> space M" "0 \<le> D t" 

320 
show "D t  indicator ?D_set t \<le> D t * (ln b * log b (D t))" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

321 
proof cases 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

322 
assume asm: "D t \<noteq> 0" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

323 
then have "0 < D t" using `0 \<le> D t` by auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

324 
then have "0 < 1 / D t" by auto 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

325 
have "D t  indicator ?D_set t \<le>  D t * (1 / D t  1)" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

326 
using asm `t \<in> space M` by (simp add: field_simps) 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

327 
also have " D t * (1 / D t  1) \<le>  D t * ln (1 / D t)" 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

328 
using ln_le_minus_one `0 < 1 / D t` by (intro mult_left_mono_neg) auto 
47694  329 
also have "\<dots> = D t * (ln b * log b (D t))" 
330 
using `0 < D t` b_gt_1 

331 
by (simp_all add: log_def ln_div) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

332 
finally show ?thesis by simp 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

333 
qed simp 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

334 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

335 
qed 
47694  336 
also have "\<dots> = (\<integral> x. ln b * (D x * log b (D x)) \<partial>M)" 
337 
by (simp add: ac_simps) 

338 
also have "\<dots> = ln b * (\<integral> x. D x * log b (D x) \<partial>M)" 

339 
using int by (rule integral_cmult) 

340 
finally show ?thesis 

341 
using b_gt_1 D by (subst KL_density) (auto simp: zero_less_mult_iff) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

342 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

343 

47694  344 
lemma (in sigma_finite_measure) KL_same_eq_0: "KL_divergence b M M = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

345 
proof  
47694  346 
have "AE x in M. 1 = RN_deriv M M x" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

347 
proof (rule RN_deriv_unique) 
47694  348 
show "(\<lambda>x. 1) \<in> borel_measurable M" "AE x in M. 0 \<le> (1 :: ereal)" by auto 
349 
show "density M (\<lambda>x. 1) = M" 

350 
apply (auto intro!: measure_eqI emeasure_density) 

351 
apply (subst emeasure_density) 

352 
apply auto 

353 
done 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

354 
qed 
47694  355 
then have "AE x in M. log b (real (RN_deriv M M x)) = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

356 
by (elim AE_mp) simp 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

357 
from integral_cong_AE[OF this] 
47694  358 
have "integral\<^isup>L M (entropy_density b M M) = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

359 
by (simp add: entropy_density_def comp_def) 
47694  360 
then show "KL_divergence b M M = 0" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

361 
unfolding KL_divergence_def 
47694  362 
by auto 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

363 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

364 

47694  365 
lemma (in information_space) KL_eq_0_iff_eq: 
366 
fixes D :: "'a \<Rightarrow> real" 

367 
assumes "prob_space (density M D)" 

368 
assumes D: "D \<in> borel_measurable M" "AE x in M. 0 \<le> D x" 

369 
assumes int: "integrable M (\<lambda>x. D x * log b (D x))" 

370 
shows "KL_divergence b M (density M D) = 0 \<longleftrightarrow> density M D = M" 

371 
using KL_same_eq_0[of b] KL_gt_0[OF assms] 

372 
by (auto simp: less_le) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

373 

47694  374 
lemma (in information_space) KL_eq_0_iff_eq_ac: 
375 
fixes D :: "'a \<Rightarrow> real" 

376 
assumes "prob_space N" 

377 
assumes ac: "absolutely_continuous M N" "sets N = sets M" 

378 
assumes int: "integrable N (entropy_density b M N)" 

379 
shows "KL_divergence b M N = 0 \<longleftrightarrow> N = M" 

41833
563bea92b2c0
add lemma KL_divergence_vimage, mutual_information_generic
hoelzl
parents:
41689
diff
changeset

380 
proof  
47694  381 
interpret N: prob_space N by fact 
382 
have "finite_measure N" by unfold_locales 

383 
from real_RN_deriv[OF this ac] guess D . note D = this 

384 

385 
have "N = density M (RN_deriv M N)" 

386 
using ac by (rule density_RN_deriv[symmetric]) 

387 
also have "\<dots> = density M D" 

388 
using borel_measurable_RN_deriv[OF ac] D by (auto intro!: density_cong) 

389 
finally have N: "N = density M D" . 

41833
563bea92b2c0
add lemma KL_divergence_vimage, mutual_information_generic
hoelzl
parents:
41689
diff
changeset

390 

47694  391 
from absolutely_continuous_AE[OF ac(2,1) D(2)] D b_gt_1 ac measurable_entropy_density 
392 
have "integrable N (\<lambda>x. log b (D x))" 

393 
by (intro integrable_cong_AE[THEN iffD2, OF _ _ _ int]) 

394 
(auto simp: N entropy_density_def) 

395 
with D b_gt_1 have "integrable M (\<lambda>x. D x * log b (D x))" 

396 
by (subst integral_density(2)[symmetric]) (auto simp: N[symmetric] comp_def) 

397 
with `prob_space N` D show ?thesis 

398 
unfolding N 

399 
by (intro KL_eq_0_iff_eq) auto 

41833
563bea92b2c0
add lemma KL_divergence_vimage, mutual_information_generic
hoelzl
parents:
41689
diff
changeset

400 
qed 
563bea92b2c0
add lemma KL_divergence_vimage, mutual_information_generic
hoelzl
parents:
41689
diff
changeset

401 

47694  402 
lemma (in information_space) KL_nonneg: 
403 
assumes "prob_space (density M D)" 

404 
assumes D: "D \<in> borel_measurable M" "AE x in M. 0 \<le> D x" 

405 
assumes int: "integrable M (\<lambda>x. D x * log b (D x))" 

406 
shows "0 \<le> KL_divergence b M (density M D)" 

407 
using KL_gt_0[OF assms] by (cases "density M D = M") (auto simp: KL_same_eq_0) 

40859  408 

47694  409 
lemma (in sigma_finite_measure) KL_density_density_nonneg: 
410 
fixes f g :: "'a \<Rightarrow> real" 

411 
assumes "1 < b" 

412 
assumes f: "f \<in> borel_measurable M" "AE x in M. 0 \<le> f x" "prob_space (density M f)" 

413 
assumes g: "g \<in> borel_measurable M" "AE x in M. 0 \<le> g x" "prob_space (density M g)" 

414 
assumes ac: "AE x in M. f x = 0 \<longrightarrow> g x = 0" 

415 
assumes int: "integrable M (\<lambda>x. g x * log b (g x / f x))" 

416 
shows "0 \<le> KL_divergence b (density M f) (density M g)" 

417 
proof  

418 
interpret Mf: prob_space "density M f" by fact 

419 
interpret Mf: information_space "density M f" b by default fact 

420 
have eq: "density (density M f) (\<lambda>x. g x / f x) = density M g" (is "?DD = _") 

421 
using f g ac by (subst density_density_divide) simp_all 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

422 

47694  423 
have "0 \<le> KL_divergence b (density M f) (density (density M f) (\<lambda>x. g x / f x))" 
424 
proof (rule Mf.KL_nonneg) 

425 
show "prob_space ?DD" unfolding eq by fact 

426 
from f g show "(\<lambda>x. g x / f x) \<in> borel_measurable (density M f)" 

427 
by auto 

428 
show "AE x in density M f. 0 \<le> g x / f x" 

429 
using f g by (auto simp: AE_density divide_nonneg_nonneg) 

430 
show "integrable (density M f) (\<lambda>x. g x / f x * log b (g x / f x))" 

431 
using `1 < b` f g ac 

432 
by (subst integral_density) 

433 
(auto intro!: integrable_cong_AE[THEN iffD2, OF _ _ _ int] measurable_If) 

434 
qed 

435 
also have "\<dots> = KL_divergence b (density M f) (density M g)" 

436 
using f g ac by (subst density_density_divide) simp_all 

437 
finally show ?thesis . 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

438 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

439 

39097  440 
subsection {* Mutual Information *} 
441 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

442 
definition (in prob_space) 
38656  443 
"mutual_information b S T X Y = 
47694  444 
KL_divergence b (distr M S X \<Otimes>\<^isub>M distr M T Y) (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)))" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

445 

47694  446 
lemma (in information_space) mutual_information_indep_vars: 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

447 
fixes S T X Y 
47694  448 
defines "P \<equiv> distr M S X \<Otimes>\<^isub>M distr M T Y" 
449 
defines "Q \<equiv> distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

450 
shows "indep_var S X T Y \<longleftrightarrow> 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

451 
(random_variable S X \<and> random_variable T Y \<and> 
47694  452 
absolutely_continuous P Q \<and> integrable Q (entropy_density b P Q) \<and> 
453 
mutual_information b S T X Y = 0)" 

454 
unfolding indep_var_distribution_eq 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

455 
proof safe 
47694  456 
assume rv: "random_variable S X" "random_variable T Y" 
43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

457 

47694  458 
interpret X: prob_space "distr M S X" 
459 
by (rule prob_space_distr) fact 

460 
interpret Y: prob_space "distr M T Y" 

461 
by (rule prob_space_distr) fact 

462 
interpret XY: pair_prob_space "distr M S X" "distr M T Y" by default 

463 
interpret P: information_space P b unfolding P_def by default (rule b_gt_1) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

464 

47694  465 
interpret Q: prob_space Q unfolding Q_def 
466 
by (rule prob_space_distr) (simp add: comp_def measurable_pair_iff rv) 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

467 

47694  468 
{ assume "distr M S X \<Otimes>\<^isub>M distr M T Y = distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))" 
469 
then have [simp]: "Q = P" unfolding Q_def P_def by simp 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

470 

47694  471 
show ac: "absolutely_continuous P Q" by (simp add: absolutely_continuous_def) 
472 
then have ed: "entropy_density b P Q \<in> borel_measurable P" 

473 
by (rule P.measurable_entropy_density) simp 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

474 

47694  475 
have "AE x in P. 1 = RN_deriv P Q x" 
476 
proof (rule P.RN_deriv_unique) 

477 
show "density P (\<lambda>x. 1) = Q" 

478 
unfolding `Q = P` by (intro measure_eqI) (auto simp: emeasure_density) 

479 
qed auto 

480 
then have ae_0: "AE x in P. entropy_density b P Q x = 0" 

481 
by eventually_elim (auto simp: entropy_density_def) 

482 
then have "integrable P (entropy_density b P Q) \<longleftrightarrow> integrable Q (\<lambda>x. 0)" 

483 
using ed unfolding `Q = P` by (intro integrable_cong_AE) auto 

484 
then show "integrable Q (entropy_density b P Q)" by simp 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

485 

47694  486 
show "mutual_information b S T X Y = 0" 
487 
unfolding mutual_information_def KL_divergence_def P_def[symmetric] Q_def[symmetric] `Q = P` 

488 
using ae_0 by (simp cong: integral_cong_AE) } 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

489 

47694  490 
{ assume ac: "absolutely_continuous P Q" 
491 
assume int: "integrable Q (entropy_density b P Q)" 

492 
assume I_eq_0: "mutual_information b S T X Y = 0" 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

493 

47694  494 
have eq: "Q = P" 
495 
proof (rule P.KL_eq_0_iff_eq_ac[THEN iffD1]) 

496 
show "prob_space Q" by unfold_locales 

497 
show "absolutely_continuous P Q" by fact 

498 
show "integrable Q (entropy_density b P Q)" by fact 

499 
show "sets Q = sets P" by (simp add: P_def Q_def sets_pair_measure) 

500 
show "KL_divergence b P Q = 0" 

501 
using I_eq_0 unfolding mutual_information_def by (simp add: P_def Q_def) 

502 
qed 

503 
then show "distr M S X \<Otimes>\<^isub>M distr M T Y = distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))" 

504 
unfolding P_def Q_def .. } 

43340
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

505 
qed 
60e181c4eae4
lemma: independence is equal to mutual information = 0
hoelzl
parents:
42148
diff
changeset

506 

40859  507 
abbreviation (in information_space) 
508 
mutual_information_Pow ("\<I>'(_ ; _')") where 

47694  509 
"\<I>(X ; Y) \<equiv> mutual_information b (count_space (X`space M)) (count_space (Y`space M)) X Y" 
41689
3e39b0e730d6
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
hoelzl
parents:
41661
diff
changeset

510 

47694  511 
lemma (in information_space) 
512 
fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" 

513 
assumes "sigma_finite_measure S" "sigma_finite_measure T" 

514 
assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" 

515 
assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" 

516 
defines "f \<equiv> \<lambda>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" 

517 
shows mutual_information_distr: "mutual_information b S T X Y = integral\<^isup>L (S \<Otimes>\<^isub>M T) f" (is "?M = ?R") 

518 
and mutual_information_nonneg: "integrable (S \<Otimes>\<^isub>M T) f \<Longrightarrow> 0 \<le> mutual_information b S T X Y" 

40859  519 
proof  
47694  520 
have X: "random_variable S X" 
521 
using Px by (auto simp: distributed_def) 

522 
have Y: "random_variable T Y" 

523 
using Py by (auto simp: distributed_def) 

524 
interpret S: sigma_finite_measure S by fact 

525 
interpret T: sigma_finite_measure T by fact 

526 
interpret ST: pair_sigma_finite S T .. 

527 
interpret X: prob_space "distr M S X" using X by (rule prob_space_distr) 

528 
interpret Y: prob_space "distr M T Y" using Y by (rule prob_space_distr) 

529 
interpret XY: pair_prob_space "distr M S X" "distr M T Y" .. 

530 
let ?P = "S \<Otimes>\<^isub>M T" 

531 
let ?D = "distr M ?P (\<lambda>x. (X x, Y x))" 

532 

533 
{ fix A assume "A \<in> sets S" 

534 
with X Y have "emeasure (distr M S X) A = emeasure ?D (A \<times> space T)" 

535 
by (auto simp: emeasure_distr measurable_Pair measurable_space 

536 
intro!: arg_cong[where f="emeasure M"]) } 

537 
note marginal_eq1 = this 

538 
{ fix A assume "A \<in> sets T" 

539 
with X Y have "emeasure (distr M T Y) A = emeasure ?D (space S \<times> A)" 

540 
by (auto simp: emeasure_distr measurable_Pair measurable_space 

541 
intro!: arg_cong[where f="emeasure M"]) } 

542 
note marginal_eq2 = this 

543 

544 
have eq: "(\<lambda>x. ereal (Px (fst x) * Py (snd x))) = (\<lambda>(x, y). ereal (Px x) * ereal (Py y))" 

545 
by auto 

546 

547 
have distr_eq: "distr M S X \<Otimes>\<^isub>M distr M T Y = density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))" 

548 
unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] eq 

549 
proof (subst pair_measure_density) 

550 
show "(\<lambda>x. ereal (Px x)) \<in> borel_measurable S" "(\<lambda>y. ereal (Py y)) \<in> borel_measurable T" 

551 
"AE x in S. 0 \<le> ereal (Px x)" "AE y in T. 0 \<le> ereal (Py y)" 

552 
using Px Py by (auto simp: distributed_def) 

553 
show "sigma_finite_measure (density S Px)" unfolding Px(1)[THEN distributed_distr_eq_density, symmetric] .. 

554 
show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. 

555 
qed (fact  simp)+ 

556 

557 
have M: "?M = KL_divergence b (density ?P (\<lambda>x. ereal (Px (fst x) * Py (snd x)))) (density ?P (\<lambda>x. ereal (Pxy x)))" 

558 
unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] .. 

559 

560 
from Px Py have f: "(\<lambda>x. Px (fst x) * Py (snd x)) \<in> borel_measurable ?P" 

561 
by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') 

562 
have PxPy_nonneg: "AE x in ?P. 0 \<le> Px (fst x) * Py (snd x)" 

563 
proof (rule ST.AE_pair_measure) 

564 
show "{x \<in> space ?P. 0 \<le> Px (fst x) * Py (snd x)} \<in> sets ?P" 

565 
using f by auto 

566 
show "AE x in S. AE y in T. 0 \<le> Px (fst (x, y)) * Py (snd (x, y))" 

567 
using Px Py by (auto simp: zero_le_mult_iff dest!: distributed_real_AE) 

568 
qed 

569 

570 
have "(AE x in ?P. Px (fst x) = 0 \<longrightarrow> Pxy x = 0)" 

571 
by (rule subdensity_real[OF measurable_fst Pxy Px]) auto 

572 
moreover 

573 
have "(AE x in ?P. Py (snd x) = 0 \<longrightarrow> Pxy x = 0)" 

574 
by (rule subdensity_real[OF measurable_snd Pxy Py]) auto 

575 
ultimately have ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 \<longrightarrow> Pxy x = 0" 

576 
by eventually_elim auto 

577 

578 
show "?M = ?R" 

579 
unfolding M f_def 

580 
using b_gt_1 f PxPy_nonneg Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] ac 

581 
by (rule ST.KL_density_density) 

582 

583 
assume int: "integrable (S \<Otimes>\<^isub>M T) f" 

584 
show "0 \<le> ?M" unfolding M 

585 
proof (rule ST.KL_density_density_nonneg 

586 
[OF b_gt_1 f PxPy_nonneg _ Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] _ ac int[unfolded f_def]]) 

587 
show "prob_space (density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Pxy x))) " 

588 
unfolding distributed_distr_eq_density[OF Pxy, symmetric] 

589 
using distributed_measurable[OF Pxy] by (rule prob_space_distr) 

590 
show "prob_space (density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Px (fst x) * Py (snd x))))" 

591 
unfolding distr_eq[symmetric] by unfold_locales 

40859  592 
qed 
593 
qed 

594 

595 
lemma (in information_space) 

47694  596 
fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" 
597 
assumes "sigma_finite_measure S" "sigma_finite_measure T" 

598 
assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" 

599 
assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" 

600 
assumes ae: "AE x in S. AE y in T. Pxy (x, y) = Px x * Py y" 

601 
shows mutual_information_eq_0: "mutual_information b S T X Y = 0" 

36624  602 
proof  
47694  603 
interpret S: sigma_finite_measure S by fact 
604 
interpret T: sigma_finite_measure T by fact 

605 
interpret ST: pair_sigma_finite S T .. 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

606 

47694  607 
have "AE x in S \<Otimes>\<^isub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0" 
608 
by (rule subdensity_real[OF measurable_fst Pxy Px]) auto 

609 
moreover 

610 
have "AE x in S \<Otimes>\<^isub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" 

611 
by (rule subdensity_real[OF measurable_snd Pxy Py]) auto 

612 
moreover 

613 
have "AE x in S \<Otimes>\<^isub>M T. Pxy x = Px (fst x) * Py (snd x)" 

614 
using distributed_real_measurable[OF Px] distributed_real_measurable[OF Py] distributed_real_measurable[OF Pxy] 

615 
by (intro ST.AE_pair_measure) (auto simp: ae intro!: measurable_snd'' measurable_fst'') 

616 
ultimately have "AE x in S \<Otimes>\<^isub>M T. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) = 0" 

617 
by eventually_elim simp 

618 
then have "(\<integral>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) \<partial>(S \<Otimes>\<^isub>M T)) = (\<integral>x. 0 \<partial>(S \<Otimes>\<^isub>M T))" 

619 
by (rule integral_cong_AE) 

620 
then show ?thesis 

621 
by (subst mutual_information_distr[OF assms(15)]) simp 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

622 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

623 

47694  624 
lemma (in information_space) mutual_information_simple_distributed: 
625 
assumes X: "simple_distributed M X Px" and Y: "simple_distributed M Y Py" 

626 
assumes XY: "simple_distributed M (\<lambda>x. (X x, Y x)) Pxy" 

627 
shows "\<I>(X ; Y) = (\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x))`space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" 

628 
proof (subst mutual_information_distr[OF _ _ simple_distributed[OF X] simple_distributed[OF Y] simple_distributed_joint[OF XY]]) 

629 
note fin = simple_distributed_joint_finite[OF XY, simp] 

630 
show "sigma_finite_measure (count_space (X ` space M))" 

631 
by (simp add: sigma_finite_measure_count_space_finite) 

632 
show "sigma_finite_measure (count_space (Y ` space M))" 

633 
by (simp add: sigma_finite_measure_count_space_finite) 

634 
let ?Pxy = "\<lambda>x. (if x \<in> (\<lambda>x. (X x, Y x)) ` space M then Pxy x else 0)" 

635 
let ?f = "\<lambda>x. ?Pxy x * log b (?Pxy x / (Px (fst x) * Py (snd x)))" 

636 
have "\<And>x. ?f x = (if x \<in> (\<lambda>x. (X x, Y x)) ` space M then Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) else 0)" 

637 
by auto 

638 
with fin show "(\<integral> x. ?f x \<partial>(count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M))) = 

639 
(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" 

640 
by (auto simp add: pair_measure_count_space lebesgue_integral_count_space_finite setsum_cases split_beta' 

641 
intro!: setsum_cong) 

642 
qed 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

643 

47694  644 
lemma (in information_space) 
645 
fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" 

646 
assumes Px: "simple_distributed M X Px" and Py: "simple_distributed M Y Py" 

647 
assumes Pxy: "simple_distributed M (\<lambda>x. (X x, Y x)) Pxy" 

648 
assumes ae: "\<forall>x\<in>space M. Pxy (X x, Y x) = Px (X x) * Py (Y x)" 

649 
shows mutual_information_eq_0_simple: "\<I>(X ; Y) = 0" 

650 
proof (subst mutual_information_simple_distributed[OF Px Py Pxy]) 

651 
have "(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y))) = 

652 
(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. 0)" 

653 
by (intro setsum_cong) (auto simp: ae) 

654 
then show "(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. 

655 
Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y))) = 0" by simp 

656 
qed 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

657 

39097  658 
subsection {* Entropy *} 
659 

47694  660 
definition (in prob_space) entropy :: "real \<Rightarrow> 'b measure \<Rightarrow> ('a \<Rightarrow> 'b) \<Rightarrow> real" where 
661 
"entropy b S X =  KL_divergence b S (distr M S X)" 

662 

40859  663 
abbreviation (in information_space) 
664 
entropy_Pow ("\<H>'(_')") where 

47694  665 
"\<H>(X) \<equiv> entropy b (count_space (X`space M)) X" 
41981
cdf7693bbe08
reworked Probability theory: measures are not type restricted to positive extended reals
hoelzl
parents:
41833
diff
changeset

666 

47694  667 
lemma (in information_space) entropy_distr: 
668 
fixes X :: "'a \<Rightarrow> 'b" 

49785  669 
assumes X: "distributed M MX X f" 
47694  670 
shows "entropy b MX X =  (\<integral>x. f x * log b (f x) \<partial>MX)" 
49785  671 
unfolding entropy_def KL_divergence_def entropy_density_def comp_def 
672 
proof (subst integral_cong_AE) 

673 
note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X] 

674 
interpret X: prob_space "distr M MX X" 

675 
using D(1) by (rule prob_space_distr) 

676 

677 
have sf: "sigma_finite_measure (distr M MX X)" by default 

678 
have ae: "AE x in MX. f x = RN_deriv MX (density MX f) x" 

679 
using D 

680 
by (intro RN_deriv_unique_sigma_finite) 

681 
(auto intro: divide_nonneg_nonneg measure_nonneg 

682 
simp: distributed_distr_eq_density[symmetric, OF X] sf) 

683 
show "AE x in distr M MX X. log b (real (RN_deriv MX (distr M MX X) x)) = 

684 
log b (f x)" 

685 
unfolding distributed_distr_eq_density[OF X] 

686 
apply (subst AE_density) 

687 
using D apply simp 

688 
using ae apply eventually_elim 

689 
apply (subst (asm) eq_commute) 

690 
apply auto 

691 
done 

692 
show " (\<integral> x. log b (f x) \<partial>distr M MX X) =  (\<integral> x. f x * log b (f x) \<partial>MX)" 

693 
unfolding distributed_distr_eq_density[OF X] 

694 
using D 

695 
by (subst integral_density) 

696 
(auto simp: borel_measurable_ereal_iff) 

697 
qed 

698 

699 
lemma (in prob_space) uniform_distributed_params: 

700 
assumes X: "distributed M MX X (\<lambda>x. indicator A x / measure MX A)" 

701 
shows "A \<in> sets MX" "measure MX A \<noteq> 0" 

47694  702 
proof  
49785  703 
interpret X: prob_space "distr M MX X" 
704 
using distributed_measurable[OF X] by (rule prob_space_distr) 

705 

706 
show "measure MX A \<noteq> 0" 

707 
proof 

708 
assume "measure MX A = 0" 

709 
with X.emeasure_space_1 X.prob_space distributed_distr_eq_density[OF X] 

710 
show False 

711 
by (simp add: emeasure_density zero_ereal_def[symmetric]) 

712 
qed 

713 
with measure_notin_sets[of A MX] show "A \<in> sets MX" 

714 
by blast 

39097  715 
qed 
36624  716 

47694  717 
lemma (in information_space) entropy_uniform: 
49785  718 
assumes X: "distributed M MX X (\<lambda>x. indicator A x / measure MX A)" (is "distributed _ _ _ ?f") 
47694  719 
shows "entropy b MX X = log b (measure MX A)" 
49785  720 
proof (subst entropy_distr[OF X]) 
721 
have [simp]: "emeasure MX A \<noteq> \<infinity>" 

722 
using uniform_distributed_params[OF X] by (auto simp add: measure_def) 

723 
have eq: "(\<integral> x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \<partial>MX) = 

724 
(\<integral> x. ( log b (measure MX A) / measure MX A) * indicator A x \<partial>MX)" 

725 
using measure_nonneg[of MX A] uniform_distributed_params[OF X] 

726 
by (auto intro!: integral_cong split: split_indicator simp: log_divide_eq) 

727 
show " (\<integral> x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \<partial>MX) = 

728 
log b (measure MX A)" 

729 
unfolding eq using uniform_distributed_params[OF X] 

730 
by (subst lebesgue_integral_cmult) (auto simp: measure_def) 

731 
qed 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

732 

47694  733 
lemma (in information_space) entropy_simple_distributed: 
734 
fixes X :: "'a \<Rightarrow> 'b" 

735 
assumes X: "simple_distributed M X f" 

736 
shows "\<H>(X) =  (\<Sum>x\<in>X`space M. f x * log b (f x))" 

49785  737 
proof (subst entropy_distr[OF simple_distributed[OF X]]) 
47694  738 
show " (\<integral>x. f x * log b (f x) \<partial>(count_space (X`space M))) =  (\<Sum>x\<in>X ` space M. f x * log b (f x))" 
739 
using X by (auto simp add: lebesgue_integral_count_space_finite) 

39097  740 
qed 
741 

40859  742 
lemma (in information_space) entropy_le_card_not_0: 
47694  743 
assumes X: "simple_distributed M X f" 
744 
shows "\<H>(X) \<le> log b (card (X ` space M \<inter> {x. f x \<noteq> 0}))" 

39097  745 
proof  
47694  746 
have "\<H>(X) = (\<Sum>x\<in>X`space M. f x * log b (1 / f x))" 
747 
unfolding entropy_simple_distributed[OF X] setsum_negf[symmetric] 

748 
using X by (auto dest: simple_distributed_nonneg intro!: setsum_cong simp: log_simps less_le) 

749 
also have "\<dots> \<le> log b (\<Sum>x\<in>X`space M. f x * (1 / f x))" 

750 
using not_empty b_gt_1 `simple_distributed M X f` 

751 
by (intro log_setsum') (auto simp: simple_distributed_nonneg simple_distributed_setsum_space) 

752 
also have "\<dots> = log b (\<Sum>x\<in>X`space M. if f x \<noteq> 0 then 1 else 0)" 

41981
cdf7693bbe08
reworked Probability theory: measures are not type restricted to positive extended reals
hoelzl
parents:
41833
diff
changeset

753 
by (intro arg_cong[where f="\<lambda>X. log b X"] setsum_cong) auto 
39097  754 
finally show ?thesis 
47694  755 
using `simple_distributed M X f` by (auto simp: setsum_cases real_eq_of_nat) 
39097  756 
qed 
757 

40859  758 
lemma (in information_space) entropy_le_card: 
47694  759 
assumes "simple_distributed M X f" 
40859  760 
shows "\<H>(X) \<le> log b (real (card (X ` space M)))" 
39097  761 
proof cases 
47694  762 
assume "X ` space M \<inter> {x. f x \<noteq> 0} = {}" 
763 
then have "\<And>x. x\<in>X`space M \<Longrightarrow> f x = 0" by auto 

39097  764 
moreover 
765 
have "0 < card (X`space M)" 

47694  766 
using `simple_distributed M X f` not_empty by (auto simp: card_gt_0_iff) 
39097  767 
then have "log b 1 \<le> log b (real (card (X`space M)))" 
768 
using b_gt_1 by (intro log_le) auto 

47694  769 
ultimately show ?thesis using assms by (simp add: entropy_simple_distributed) 
39097  770 
next 
47694  771 
assume False: "X ` space M \<inter> {x. f x \<noteq> 0} \<noteq> {}" 
772 
have "card (X ` space M \<inter> {x. f x \<noteq> 0}) \<le> card (X ` space M)" 

773 
(is "?A \<le> ?B") using assms not_empty 

774 
by (auto intro!: card_mono simp: simple_function_def simple_distributed_def) 

40859  775 
note entropy_le_card_not_0[OF assms] 
39097  776 
also have "log b (real ?A) \<le> log b (real ?B)" 
40859  777 
using b_gt_1 False not_empty `?A \<le> ?B` assms 
47694  778 
by (auto intro!: log_le simp: card_gt_0_iff simp: simple_distributed_def) 
39097  779 
finally show ?thesis . 
780 
qed 

781 

782 
subsection {* Conditional Mutual Information *} 

783 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

784 
definition (in prob_space) 
41689
3e39b0e730d6
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
hoelzl
parents:
41661
diff
changeset

785 
"conditional_mutual_information b MX MY MZ X Y Z \<equiv> 
3e39b0e730d6
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
hoelzl
parents:
41661
diff
changeset

786 
mutual_information b MX (MY \<Otimes>\<^isub>M MZ) X (\<lambda>x. (Y x, Z x))  
3e39b0e730d6
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
hoelzl
parents:
41661
diff
changeset

787 
mutual_information b MX MZ X Z" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

788 

40859  789 
abbreviation (in information_space) 
790 
conditional_mutual_information_Pow ("\<I>'( _ ; _  _ ')") where 

36624  791 
"\<I>(X ; Y  Z) \<equiv> conditional_mutual_information b 
47694  792 
(count_space (X ` space M)) (count_space (Y ` space M)) (count_space (Z ` space M)) X Y Z" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

793 

40859  794 
lemma (in information_space) conditional_mutual_information_generic_eq: 
47694  795 
assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" and P: "sigma_finite_measure P" 
796 
assumes Px: "distributed M S X Px" 

797 
assumes Pz: "distributed M P Z Pz" 

798 
assumes Pyz: "distributed M (T \<Otimes>\<^isub>M P) (\<lambda>x. (Y x, Z x)) Pyz" 

799 
assumes Pxz: "distributed M (S \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Z x)) Pxz" 

800 
assumes Pxyz: "distributed M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x)) Pxyz" 

801 
assumes I1: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" 

802 
assumes I2: "integrable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" 

803 
shows "conditional_mutual_information b S T P X Y Z 

804 
= (\<integral>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" 

40859  805 
proof  
47694  806 
interpret S: sigma_finite_measure S by fact 
807 
interpret T: sigma_finite_measure T by fact 

808 
interpret P: sigma_finite_measure P by fact 

809 
interpret TP: pair_sigma_finite T P .. 

810 
interpret SP: pair_sigma_finite S P .. 

811 
interpret SPT: pair_sigma_finite "S \<Otimes>\<^isub>M P" T .. 

812 
interpret STP: pair_sigma_finite S "T \<Otimes>\<^isub>M P" .. 

813 
have TP: "sigma_finite_measure (T \<Otimes>\<^isub>M P)" .. 

814 
have SP: "sigma_finite_measure (S \<Otimes>\<^isub>M P)" .. 

815 
have YZ: "random_variable (T \<Otimes>\<^isub>M P) (\<lambda>x. (Y x, Z x))" 

816 
using Pyz by (simp add: distributed_measurable) 

817 

818 
have Pxyz_f: "\<And>M f. f \<in> measurable M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) \<Longrightarrow> (\<lambda>x. Pxyz (f x)) \<in> borel_measurable M" 

819 
using measurable_comp[OF _ Pxyz[THEN distributed_real_measurable]] by (auto simp: comp_def) 

820 

821 
{ fix f g h M 

822 
assume f: "f \<in> measurable M S" and g: "g \<in> measurable M P" and h: "h \<in> measurable M (S \<Otimes>\<^isub>M P)" 

823 
from measurable_comp[OF h Pxz[THEN distributed_real_measurable]] 

824 
measurable_comp[OF f Px[THEN distributed_real_measurable]] 

825 
measurable_comp[OF g Pz[THEN distributed_real_measurable]] 

826 
have "(\<lambda>x. log b (Pxz (h x) / (Px (f x) * Pz (g x)))) \<in> borel_measurable M" 

827 
by (simp add: comp_def b_gt_1) } 

828 
note borel_log = this 

829 

830 
have measurable_cut: "(\<lambda>(x, y, z). (x, z)) \<in> measurable (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (S \<Otimes>\<^isub>M P)" 

831 
by (auto simp add: split_beta' comp_def intro!: measurable_Pair measurable_snd') 

832 

833 
from Pxz Pxyz have distr_eq: "distr M (S \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Z x)) = 

834 
distr (distr M (S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P) (\<lambda>x. (X x, Y x, Z x))) (S \<Otimes>\<^isub>M P) (\<lambda>(x, y, z). (x, z))" 

835 
by (subst distr_distr[OF measurable_cut]) (auto dest: distributed_measurable simp: comp_def) 

40859  836 

47694  837 
have "mutual_information b S P X Z = 
838 
(\<integral>x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \<partial>(S \<Otimes>\<^isub>M P))" 

839 
by (rule mutual_information_distr[OF S P Px Pz Pxz]) 

840 
also have "\<dots> = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" 

841 
using b_gt_1 Pxz Px Pz 

842 
by (subst distributed_transform_integral[OF Pxyz Pxz, where T="\<lambda>(x, y, z). (x, z)"]) 

843 
(auto simp: split_beta' intro!: measurable_Pair measurable_snd' measurable_snd'' measurable_fst'' borel_measurable_times 

844 
dest!: distributed_real_measurable) 

845 
finally have mi_eq: 

846 
"mutual_information b S P X Z = (\<integral>(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \<partial>(S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P))" . 

847 

848 
have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Px (fst x) = 0 \<longrightarrow> Pxyz x = 0" 

849 
by (intro subdensity_real[of fst, OF _ Pxyz Px]) auto 

850 
moreover have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pz (snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" 

851 
by (intro subdensity_real[of "\<lambda>x. snd (snd x)", OF _ Pxyz Pz]) (auto intro: measurable_snd') 

852 
moreover have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pxz (fst x, snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" 

853 
by (intro subdensity_real[of "\<lambda>x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) (auto intro: measurable_Pair measurable_snd') 

854 
moreover have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. Pyz (snd x) = 0 \<longrightarrow> Pxyz x = 0" 

855 
by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) (auto intro: measurable_Pair) 

856 
moreover have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Px (fst x)" 

857 
using Px by (intro STP.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable) 

858 
moreover have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pyz (snd x)" 

859 
using Pyz by (intro STP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) 

860 
moreover have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pz (snd (snd x))" 

861 
using Pz Pz[THEN distributed_real_measurable] by (auto intro!: measurable_snd'' TP.AE_pair_measure STP.AE_pair_measure AE_I2[of S] dest: distributed_real_AE) 

862 
moreover have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 0 \<le> Pxz (fst x, snd (snd x))" 

863 
using Pxz[THEN distributed_real_AE, THEN SP.AE_pair] 

864 
using measurable_comp[OF measurable_Pair[OF measurable_fst measurable_comp[OF measurable_snd measurable_snd]] Pxz[THEN distributed_real_measurable], of T] 

865 
using measurable_comp[OF measurable_snd measurable_Pair2[OF Pxz[THEN distributed_real_measurable]], of _ T] 

866 
by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure simp: comp_def) 

867 
moreover note Pxyz[THEN distributed_real_AE] 

868 
ultimately have "AE x in S \<Otimes>\<^isub>M T \<Otimes>\<^isub>M P. 

869 
Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x)))  

870 
Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) = 

871 
Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " 

872 
proof eventually_elim 

873 
case (goal1 x) 

874 
show ?case 

40859  875 
proof cases 
47694  876 
assume "Pxyz x \<noteq> 0" 
877 
with goal1 have "0 < Px (fst x)" "0 < Pz (snd (snd x))" "0 < Pxz (fst x, snd (snd x))" "0 < Pyz (snd x)" "0 < Pxyz x" 

878 
by auto 

879 
then show ?thesis 

880 
using b_gt_1 by (simp add: log_simps mult_pos_pos less_imp_le field_simps) 

40859  881 
qed simp 
882 
qed 

47694  883 
with I1 I2 show ?thesis 
40859  884 
unfolding conditional_mutual_information_def 
47694  885 
apply (subst mi_eq) 
886 
apply (subst mutual_information_distr[OF S TP Px Pyz Pxyz]) 

887 
apply (subst integral_diff(2)[symmetric]) 

888 
apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff) 

889 
done 

40859  890 
qed 
891 

892 
lemma (in information_space) conditional_mutual_information_eq: 

47694  893 
assumes Pz: "simple_distributed M Z Pz" 
894 
assumes Pyz: "simple_distributed M (\<lambda>x. (Y x, Z x)) Pyz" 

895 
assumes Pxz: "simple_distributed M (\<lambda>x. (X x, Z x)) Pxz" 

896 
assumes Pxyz: "simple_distributed M (\<lambda>x. (X x, Y x, Z x)) Pxyz" 

897 
shows "\<I>(X ; Y  Z) = 

898 
(\<Sum>(x, y, z)\<in>(\<lambda>x. (X x, Y x, Z x))`space M. Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" 

899 
proof (subst conditional_mutual_information_generic_eq[OF _ _ _ _ 

900 
simple_distributed[OF Pz] simple_distributed_joint[OF Pyz] simple_distributed_joint[OF Pxz] 

901 
simple_distributed_joint2[OF Pxyz]]) 

902 
note simple_distributed_joint2_finite[OF Pxyz, simp] 

903 
show "sigma_finite_measure (count_space (X ` space M))" 

904 
by (simp add: sigma_finite_measure_count_space_finite) 

905 
show "sigma_finite_measure (count_space (Y ` space M))" 

906 
by (simp add: sigma_finite_measure_count_space_finite) 

907 
show "sigma_finite_measure (count_space (Z ` space M))" 

908 
by (simp add: sigma_finite_measure_count_space_finite) 

909 
have "count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M) \<Otimes>\<^isub>M count_space (Z ` space M) = 

910 
count_space (X`space M \<times> Y`space M \<times> Z`space M)" 

911 
(is "?P = ?C") 

912 
by (simp add: pair_measure_count_space) 

40859  913 

47694  914 
let ?Px = "\<lambda>x. measure M (X ` {x} \<inter> space M)" 
915 
have "(\<lambda>x. (X x, Z x)) \<in> measurable M (count_space (X ` space M) \<Otimes>\<^isub>M count_space (Z ` space M))" 

916 
using simple_distributed_joint[OF Pxz] by (rule distributed_measurable) 

917 
from measurable_comp[OF this measurable_fst] 

918 
have "random_variable (count_space (X ` space M)) X" 

919 
by (simp add: comp_def) 

920 
then have "simple_function M X" 

921 
unfolding simple_function_def by auto 

922 
then have "simple_distributed M X ?Px" 

923 
by (rule simple_distributedI) auto 

924 
then show "distributed M (count_space (X ` space M)) X ?Px" 

925 
by (rule simple_distributed) 

926 

927 
let ?f = "(\<lambda>x. if x \<in> (\<lambda>x. (X x, Y x, Z x)) ` space M then Pxyz x else 0)" 

928 
let ?g = "(\<lambda>x. if x \<in> (\<lambda>x. (Y x, Z x)) ` space M then Pyz x else 0)" 

929 
let ?h = "(\<lambda>x. if x \<in> (\<lambda>x. (X x, Z x)) ` space M then Pxz x else 0)" 

930 
show 

931 
"integrable ?P (\<lambda>(x, y, z). ?f (x, y, z) * log b (?f (x, y, z) / (?Px x * ?g (y, z))))" 

932 
"integrable ?P (\<lambda>(x, y, z). ?f (x, y, z) * log b (?h (x, z) / (?Px x * Pz z)))" 

933 
by (auto intro!: integrable_count_space simp: pair_measure_count_space) 

934 
let ?i = "\<lambda>x y z. ?f (x, y, z) * log b (?f (x, y, z) / (?h (x, z) * (?g (y, z) / Pz z)))" 

935 
let ?j = "\<lambda>x y z. Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z)))" 

936 
have "(\<lambda>(x, y, z). ?i x y z) = (\<lambda>x. if x \<in> (\<lambda>x. (X x, Y x, Z x)) ` space M then ?j (fst x) (fst (snd x)) (snd (snd x)) else 0)" 

937 
by (auto intro!: ext) 

938 
then show "(\<integral> (x, y, z). ?i x y z \<partial>?P) = (\<Sum>(x, y, z)\<in>(\<lambda>x. (X x, Y x, Z x)) ` space M. ?j x y z)" 

939 
by (auto intro!: setsum_cong simp add: `?P = ?C` lebesgue_integral_count_space_finite simple_distributed_finite setsum_cases split_beta') 

36624  940 
qed 
941 

47694  942 
lemma (in information_space) conditional_mutual_information_nonneg: 
943 
assumes X: "simple_function M X" and Y: "simple_function M Y" and Z: "simple_function M Z" 

944 
shows "0 \<le> \<I>(X ; Y  Z)" 

945 
proof  

946 
def Pz \<equiv> "\<lambda>x. if x \<in> Z`space M then measure M (Z ` {x} \<inter> space M) else 0" 

947 
def Pxz \<equiv> "\<lambda>x. if x \<in> (\<lambda>x. (X x, Z x))`space M then measure M ((\<lambda>x. (X x, Z x)) ` {x} \<inter> space M) else 0" 

948 
def Pyz \<equiv> "\<lambda>x. if x \<in> (\<lambda>x. (Y x, Z x))`space M then measure M ((\<lambda>x. (Y x, Z x)) ` {x} \<inter> space M) else 0" 

949 
def Pxyz \<equiv> "\<lambda>x. if x \<in> (\<lambda>x. (X x, Y x, Z x))`space M then measure M ((\<lambda>x. (X x, Y x, Z x)) ` {x} \<inter> space M) else 0" 

950 
let ?M = "X`space M \<times> Y`space M \<times> Z`space M" 

36624  951 

47694  952 
note XZ = simple_function_Pair[OF X Z] 
953 
note YZ = simple_function_Pair[OF Y Z] 

954 
note XYZ = simple_function_Pair[OF X simple_function_Pair[OF Y Z]] 

955 
have Pz: "simple_distributed M Z Pz" 

956 
using Z by (rule simple_distributedI) (auto simp: Pz_def) 

957 
have Pxz: "simple_distributed M (\<lambda>x. (X x, Z x)) Pxz" 

958 
using XZ by (rule simple_distributedI) (auto simp: Pxz_def) 

959 
have Pyz: "simple_distributed M (\<lambda>x. (Y x, Z x)) Pyz" 

960 
using YZ by (rule simple_distributedI) (auto simp: Pyz_def) 

961 
have Pxyz: "simple_distributed M (\<lambda>x. (X x, Y x, Z x)) Pxyz" 

962 
using XYZ by (rule simple_distributedI) (auto simp: Pxyz_def) 

40859  963 

47694  964 
{ fix z assume z: "z \<in> Z ` space M" then have "(\<Sum>x\<in>X ` space M. Pxz (x, z)) = Pz z" 
965 
using distributed_marginal_eq_joint_simple[OF X Pz Pxz z] 

966 
by (auto intro!: setsum_cong simp: Pxz_def) } 

967 
note marginal1 = this 

40859  968 

47694  969 
{ fix z assume z: "z \<in> Z ` space M" then have "(\<Sum>y\<in>Y ` space M. Pyz (y, z)) = Pz z" 
970 
using distributed_marginal_eq_joint_simple[OF Y Pz Pyz z] 

971 
by (auto intro!: setsum_cong simp: Pyz_def) } 

972 
note marginal2 = this 

973 

974 
have " \<I>(X ; Y  Z) =  (\<Sum>(x, y, z) \<in> ?M. Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" 

975 
unfolding conditional_mutual_information_eq[OF Pz Pyz Pxz Pxyz] 

976 
using X Y Z by (auto intro!: setsum_mono_zero_left simp: Pxyz_def simple_functionD) 

977 
also have "\<dots> \<le> log b (\<Sum>(x, y, z) \<in> ?M. Pxz (x, z) * (Pyz (y,z) / Pz z))" 

41981
cdf7693bbe08
reworked Probability theory: measures are not type restricted to positive extended reals
hoelzl
parents:
41833
diff
changeset

978 
unfolding split_beta' 
36624  979 
proof (rule log_setsum_divide) 
47694  980 
show "?M \<noteq> {}" using not_empty by simp 
36624  981 
show "1 < b" using b_gt_1 . 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

982 

47694  983 
show "finite ?M" using X Y Z by (auto simp: simple_functionD) 
40859  984 

47694  985 
then show "(\<Sum>x\<in>?M. Pxyz (fst x, fst (snd x), snd (snd x))) = 1" 
986 
apply (subst Pxyz[THEN simple_distributed_setsum_space, symmetric]) 

987 
apply simp 

988 
apply (intro setsum_mono_zero_right) 

989 
apply (auto simp: Pxyz_def) 

990 
done 

991 
let ?N = "(\<lambda>x. (X x, Y x, Z x)) ` space M" 

992 
fix x assume x: "x \<in> ?M" 

993 
let ?Q = "Pxyz (fst x, fst (snd x), snd (snd x))" 

994 
let ?P = "Pxz (fst x, snd (snd x)) * (Pyz (fst (snd x), snd (snd x)) / Pz (snd (snd x)))" 

995 
from x show "0 \<le> ?Q" "0 \<le> ?P" 

996 
using Pxyz[THEN simple_distributed, THEN distributed_real_AE] 

997 
using Pxz[THEN simple_distributed, THEN distributed_real_AE] 

998 
using Pyz[THEN simple_distributed, THEN distributed_real_AE] 

999 
using Pz[THEN simple_distributed, THEN distributed_real_AE] 

1000 
by (auto intro!: mult_nonneg_nonneg divide_nonneg_nonneg simp: AE_count_space Pxyz_def Pxz_def Pyz_def Pz_def) 

1001 
moreover assume "0 < ?Q" 

1002 
moreover have "AE x in count_space ?N. Pz (snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" 

1003 
by (intro subdensity_real[of "\<lambda>x. snd (snd x)", OF _ Pxyz[THEN simple_distributed] Pz[THEN simple_distributed]]) (auto intro: measurable_snd') 

1004 
then have "\<And>x. Pz (snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" 

1005 
by (auto simp: Pz_def Pxyz_def AE_count_space) 

1006 
moreover have "AE x in count_space ?N. Pxz (fst x, snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" 

1007 
by (intro subdensity_real[of "\<lambda>x. (fst x, snd (snd x))", OF _ Pxyz[THEN simple_distributed] Pxz[THEN simple_distributed]]) (auto intro: measurable_Pair measurable_snd') 

1008 
then have "\<And>x. Pxz (fst x, snd (snd x)) = 0 \<longrightarrow> Pxyz x = 0" 

1009 
by (auto simp: Pz_def Pxyz_def AE_count_space) 

1010 
moreover have "AE x in count_space ?N. Pyz (snd x) = 0 \<longrightarrow> Pxyz x = 0" 

1011 
by (intro subdensity_real[of snd, OF _ Pxyz[THEN simple_distributed] Pyz[THEN simple_distributed]]) (auto intro: measurable_Pair) 

1012 
then have "\<And>x. Pyz (snd x) = 0 \<longrightarrow> Pxyz x = 0" 

1013 
by (auto simp: Pz_def Pxyz_def AE_count_space) 

1014 
ultimately show "0 < ?P" using x by (auto intro!: divide_pos_pos mult_pos_pos simp: less_le) 

40859  1015 
qed 
47694  1016 
also have "(\<Sum>(x, y, z) \<in> ?M. Pxz (x, z) * (Pyz (y,z) / Pz z)) = (\<Sum>z\<in>Z`space M. Pz z)" 
36624  1017 
apply (simp add: setsum_cartesian_product') 
1018 
apply (subst setsum_commute) 

1019 
apply (subst (2) setsum_commute) 

47694  1020 
apply (auto simp: setsum_divide_distrib[symmetric] setsum_product[symmetric] marginal1 marginal2 
36624  1021 
intro!: setsum_cong) 
47694  1022 
done 
1023 
also have "log b (\<Sum>z\<in>Z`space M. Pz z) = 0" 

1024 
using Pz[THEN simple_distributed_setsum_space] by simp 

40859  1025 
finally show ?thesis by simp 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

1026 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

1027 

39097  1028 
subsection {* Conditional Entropy *} 
1029 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

1030 
definition (in prob_space) 
47694  1031 
"conditional_entropy b S T X Y = entropy b (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))  entropy b T Y" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

1032 

40859  1033 
abbreviation (in information_space) 
1034 
conditional_entropy_Pow ("\<H>'(_  _')") where 

47694  1035 
"\<H>(X  Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

1036 

40859  1037 
lemma (in information_space) conditional_entropy_generic_eq: 
47694  1038 
fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" 
1039 
assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" 

1040 
assumes Px: "distributed M S X Px" 

1041 
assumes Py: "distributed M T Y Py" 

1042 
assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" 

1043 
assumes I1: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Pxy x))" 

1044 
assumes I2: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))" 

1045 
shows "conditional_entropy b S T X Y =  (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))" 

40859  1046 
proof  
47694  1047 
interpret S: sigma_finite_measure S by fact 
1048 
interpret T: sigma_finite_measure T by fact 

1049 
interpret ST: pair_sigma_finite S T .. 

1050 
have ST: "sigma_finite_measure (S \<Otimes>\<^isub>M T)" .. 

1051 

1052 
interpret Pxy: prob_space "density (S \<Otimes>\<^isub>M T) Pxy" 

1053 
unfolding Pxy[THEN distributed_distr_eq_density, symmetric] 

1054 
using Pxy[THEN distributed_measurable] by (rule prob_space_distr) 

1055 

1056 
from Py Pxy have distr_eq: "distr M T Y = 

1057 
distr (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) T snd" 

1058 
by (subst distr_distr[OF measurable_snd]) (auto dest: distributed_measurable simp: comp_def) 

1059 

1060 
have "entropy b T Y =  (\<integral>y. Py y * log b (Py y) \<partial>T)" 

1061 
by (rule entropy_distr[OF T Py]) 

1062 
also have "\<dots> =  (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))" 

1063 
using b_gt_1 Py[THEN distributed_real_measurable] 

1064 
by (subst distributed_transform_integral[OF Pxy Py, where T=snd]) (auto intro!: integral_cong) 

1065 
finally have e_eq: "entropy b T Y =  (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))" . 

1066 

1067 
have "AE x in S \<Otimes>\<^isub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0" 

1068 
by (intro subdensity_real[of fst, OF _ Pxy Px]) (auto intro: measurable_Pair) 

1069 
moreover have "AE x in S \<Otimes>\<^isub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" 

1070 
by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair) 

1071 
moreover have "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Px (fst x)" 

1072 
using Px by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable) 

1073 
moreover have "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Py (snd x)" 

1074 
using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) 

1075 
moreover note Pxy[THEN distributed_real_AE] 

1076 
ultimately have pos: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Pxy x \<and> 0 \<le> Px (fst x) \<and> 0 \<le> Py (snd x) \<and> 

1077 
(Pxy x = 0 \<or> (Pxy x \<noteq> 0 \<longrightarrow> 0 < Pxy x \<and> 0 < Px (fst x) \<and> 0 < Py (snd x)))" 

1078 
by eventually_elim auto 

1079 

1080 
from pos have "AE x in S \<Otimes>\<^isub>M T. 

1081 
Pxy x * log b (Pxy x)  Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / Py (snd x))" 

1082 
by eventually_elim (auto simp: log_simps mult_pos_pos field_simps b_gt_1) 

1083 
with I1 I2 show ?thesis 

40859  1084 
unfolding conditional_entropy_def 
47694  1085 
apply (subst e_eq) 
1086 
apply (subst entropy_distr[OF ST Pxy]) 

1087 
unfolding minus_diff_minus 

1088 
apply (subst integral_diff(2)[symmetric]) 

1089 
apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff) 

1090 
done 

39097  1091 
qed 
1092 

40859  1093 
lemma (in information_space) conditional_entropy_eq: 
47694  1094 
assumes Y: "simple_distributed M Y Py" and X: "simple_function M X" 
1095 
assumes XY: "simple_distributed M (\<lambda>x. (X x, Y x)) Pxy" 

1096 
shows "\<H>(X  Y) =  (\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / Py y))" 

1097 
proof (subst conditional_entropy_generic_eq[OF _ _ 

1098 
simple_distributed[OF simple_distributedI[OF X refl]] simple_distributed[OF Y] simple_distributed_joint[OF XY]]) 

1099 
have [simp]: "finite (X`space M)" using X by (simp add: simple_function_def) 

1100 
note Y[THEN simple_distributed_finite, simp] 

1101 
show "sigma_finite_measure (count_space (X ` space M))" 

1102 
by (simp add: sigma_finite_measure_count_space_finite) 

1103 
show "sigma_finite_measure (count_space (Y ` space M))" 

1104 
by (simp add: sigma_finite_measure_count_space_finite) 

1105 
let ?f = "(\<lambda>x. if x \<in> (\<lambda>x. (X x, Y x)) ` space M then Pxy x else 0)" 

1106 
have "count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M) = count_space (X`space M \<times> Y`space M)" 

1107 
(is "?P = ?C") 

1108 
using X Y by (simp add: simple_distributed_finite pair_measure_count_space) 

1109 
with X Y show 

1110 
"integrable ?P (\<lambda>x. ?f x * log b (?f x))" 

1111 
"integrable ?P (\<lambda>x. ?f x * log b (Py (snd x)))" 

1112 
by (auto intro!: integrable_count_space simp: simple_distributed_finite) 

1113 
have eq: "(\<lambda>(x, y). ?f (x, y) * log b (?f (x, y) / Py y)) = 

1114 
(\<lambda>x. if x \<in> (\<lambda>x. (X x, Y x)) ` space M then Pxy x * log b (Pxy x / Py (snd x)) else 0)" 

1115 
by auto 

1116 
from X Y show " (\<integral> (x, y). ?f (x, y) * log b (?f (x, y) / Py y) \<partial>?P) = 

1117 
 (\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / Py y))" 

1118 
by (auto intro!: setsum_cong simp add: `?P = ?C` lebesgue_integral_count_space_finite simple_distributed_finite eq setsum_cases split_beta') 

1119 
qed 

39097  1120 

47694  1121 
lemma (in information_space) conditional_mutual_information_eq_conditional_entropy: 
41689
3e39b0e730d6
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
hoelzl
parents:
41661
diff
changeset

1122 
assumes X: "simple_function M X" and Y: "simple_function M Y" 
47694  1123 
shows "\<I>(X ; X  Y) = \<H>(X  Y)" 
1124 
proof  

1125 
def Py \<equiv> "\<lambda>x. if x \<in> Y`space M then measure M (Y ` {x} \<inter> space M) else 0" 

1126 
def Pxy \<equiv> "\<lambda>x. if x \<in> (\<lambda>x. (X x, Y x))`space M then measure M ((\<lambda>x. (X x, Y x)) ` {x} \<inter> space M) else 0" 

1127 
def Pxxy \<equiv> "\<lambda>x. if x \<in> (\<lambda>x. (X x, X x, Y x))`space M then measure M ((\<lambda>x. (X x, X x, Y x)) ` {x} \<inter> space M) else 0" 

1128 
let ?M = "X`space M \<times> X`space M \<times> Y`space M" 

39097  1129 

47694  1130 
note XY = simple_function_Pair[OF X Y] 
1131 
note XXY = simple_function_Pair[OF X XY] 

1132 
have Py: "simple_distributed M Y Py" 

1133 
using Y by (rule simple_distributedI) (auto simp: Py_def) 

1134 
have Pxy: "simple_distributed M (\<lambda>x. (X x, Y x)) Pxy" 

1135 
using XY by (rule simple_distributedI) (auto simp: Pxy_def) 

1136 
have Pxxy: "simple_distributed M (\<lambda>x. (X x, X x, Y x)) Pxxy" 

1137 
using XXY by (rule simple_distributedI) (auto simp: Pxxy_def) 

1138 
have eq: "(\<lambda>x. (X x, X x, Y x)) ` space M = (\<lambda>(x, y). (x, x, y)) ` (\<lambda>x. (X x, Y x)) ` space M" 

1139 
by auto 

1140 
have inj: "\<And>A. inj_on (\<lambda>(x, y). (x, x, y)) A" 

1141 
by (auto simp: inj_on_def) 

1142 
have Pxxy_eq: "\<And>x y. Pxxy (x, x, y) = Pxy (x, y)" 

1143 
by (auto simp: Pxxy_def Pxy_def intro!: arg_cong[where f=prob]) 

1144 
have "AE x in count_space ((\<lambda>x. (X x, Y x))`space M). Py (snd x) = 0 \<longrightarrow> Pxy x = 0" 

1145 
by (intro subdensity_real[of snd, OF _ Pxy[THEN simple_distributed] Py[THEN simple_distributed]]) (auto intro: measurable_Pair) 

1146 
then show ?thesis 

1147 
apply (subst conditional_mutual_information_eq[OF Py Pxy Pxy Pxxy]) 

1148 
apply (subst conditional_entropy_eq[OF Py X Pxy]) 

1149 
apply (auto intro!: setsum_cong simp: Pxxy_eq setsum_negf[symmetric] eq setsum_reindex[OF inj] 

1150 
log_simps zero_less_mult_iff zero_le_mult_iff field_simps mult_less_0_iff AE_count_space) 

1151 
using Py[THEN simple_distributed, THEN distributed_real_AE] Pxy[THEN simple_distributed, THEN distributed_real_AE] 

1152 
apply (auto simp add: not_le[symmetric] AE_count_space) 

1153 
done 

1154 
qed 

1155 

1156 
lemma (in information_space) conditional_entropy_nonneg: 

1157 
assumes X: "simple_function M X" and Y: "simple_function M Y" shows "0 \<le> \<H>(X  Y)" 

1158 
using conditional_mutual_information_eq_conditional_entropy[OF X Y] conditional_mutual_information_nonneg[OF X X Y] 

1159 
by simp 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

1160 

39097  1161 
subsection {* Equalities *} 
1162 

47694  1163 
lemma (in information_space) mutual_information_eq_entropy_conditional_entropy_distr: 
1164 
fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" and Pxy :: "('b \<times> 'c) \<Rightarrow> real" 

1165 
assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" 

1166 
assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" 

1167 
assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy" 

1168 
assumes Ix: "integrable(S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Px (fst x)))" 

1169 
assumes Iy: "integrable(S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))" 

1170 
assumes Ixy: "integrable(S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Pxy x))" 

1171 
shows "mutual_information b S T X Y = entropy b S X + entropy b T Y  entropy b (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))" 

40859  1172 
proof  
47694  1173 
have X: "entropy b S X =  (\<integral>x. Pxy x * log b (Px (fst x)) \<partial>(S \<Otimes>\<^isub>M T))" 
1174 
using b_gt_1 Px[THEN distributed_real_measurable] 

1175 
apply (subst entropy_distr[OF S Px]) 

1176 
apply (subst distributed_transform_integral[OF Pxy Px, where T=fst]) 

1177 
apply (auto intro!: integral_cong) 

1178 
done 

1179 

1180 
have Y: "entropy b T Y =  (\<integral>x. Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))" 

1181 
using b_gt_1 Py[THEN distributed_real_measurable] 

1182 
apply (subst entropy_distr[OF T Py]) 

1183 
apply (subst distributed_transform_integral[OF Pxy Py, where T=snd]) 

1184 
apply (auto intro!: integral_cong) 

1185 
done 

1186 

1187 
interpret S: sigma_finite_measure S by fact 

1188 
interpret T: sigma_finite_measure T by fact 

1189 
interpret ST: pair_sigma_finite S T .. 

1190 
have ST: "sigma_finite_measure (S \<Otimes>\<^isub>M T)" .. 

1191 

1192 
have XY: "entropy b (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) =  (\<integral>x. Pxy x * log b (Pxy x) \<partial>(S \<Otimes>\<^isub>M T))" 

1193 
by (subst entropy_distr[OF ST Pxy]) (auto intro!: integral_cong) 

1194 

1195 
have "AE x in S \<Otimes>\<^isub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0" 

1196 
by (intro subdensity_real[of fst, OF _ Pxy Px]) (auto intro: measurable_Pair) 

1197 
moreover have "AE x in S \<Otimes>\<^isub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0" 

1198 
by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair) 

1199 
moreover have "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Px (fst x)" 

1200 
using Px by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable) 

1201 
moreover have "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Py (snd x)" 

1202 
using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) 

1203 
moreover note Pxy[THEN distributed_real_AE] 

1204 
ultimately have "AE x in S \<Otimes>\<^isub>M T. Pxy x * log b (Pxy x)  Pxy x * log b (Px (fst x))  Pxy x * log b (Py (snd x)) = 

1205 
Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" 

1206 
(is "AE x in _. ?f x = ?g x") 

1207 
proof eventually_elim 

1208 
case (goal1 x) 

1209 
show ?case 

1210 
proof cases 

1211 
assume "Pxy x \<noteq> 0" 

1212 
with goal1 have "0 < Px (fst x)" "0 < Py (snd x)" "0 < Pxy x" 

1213 
by auto 

1214 
then show ?thesis 

1215 
using b_gt_1 by (simp add: log_simps mult_pos_pos less_imp_le field_simps) 

1216 
qed simp 

1217 
qed 

1218 

1219 
have "entropy b S X + entropy b T Y  entropy b (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) = integral\<^isup>L (S \<Otimes>\<^isub>M T) ?f" 

1220 
unfolding X Y XY 

1221 
apply (subst integral_diff) 

1222 
apply (intro integral_diff Ixy Ix Iy)+ 

1223 
apply (subst integral_diff) 

1224 
apply (intro integral_diff Ixy Ix Iy)+ 

1225 
apply (simp add: field_simps) 

1226 
done 

1227 
also have "\<dots> = integral\<^isup>L (S \<Otimes>\<^isub>M T) ?g" 

1228 
using `AE x in _. ?f x = ?g x` by (rule integral_cong_AE) 

1229 
also have "\<dots> = mutual_information b S T X Y" 

1230 
by (rule mutual_information_distr[OF S T Px Py Pxy, symmetric]) 

1231 
finally show ?thesis .. 

1232 
qed 

1233 

1234 
lemma (in information_space) mutual_information_eq_entropy_conditional_entropy: 

1235 
assumes sf_X: "simple_function M X" and sf_Y: "simple_function M Y" 

1236 
shows "\<I>(X ; Y) = \<H>(X)  \<H>(X  Y)" 

1237 
proof  

1238 
have X: "simple_distributed M X (\<lambda>x. measure M (X ` {x} \<inter> space M))" 

1239 
using sf_X by (rule simple_distributedI) auto 

1240 
have Y: "simple_distributed M Y (\<lambda>x. measure M (Y ` {x} \<inter> space M))" 
