author  hoelzl 
Thu, 02 Sep 2010 19:51:53 +0200  
changeset 39097  943c7b348524 
parent 39092  98de40859858 
child 39198  f967a16dfcdd 
permissions  rwrr 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

1 
theory Information 
38656  2 
imports Probability_Space Product_Measure Convex Radon_Nikodym 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

3 
begin 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

4 

39097  5 
lemma log_le: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log a x \<le> log a y" 
6 
by (subst log_le_cancel_iff) auto 

7 

8 
lemma log_less: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x < y \<Longrightarrow> log a x < log a y" 

9 
by (subst log_less_cancel_iff) auto 

10 

11 
lemma setsum_cartesian_product': 

12 
"(\<Sum>x\<in>A \<times> B. f x) = (\<Sum>x\<in>A. setsum (\<lambda>y. f (x, y)) B)" 

13 
unfolding setsum_cartesian_product by simp 

14 

38656  15 
lemma real_of_pinfreal_inverse[simp]: 
16 
fixes X :: pinfreal 

17 
shows "real (inverse X) = 1 / real X" 

18 
by (cases X) (auto simp: inverse_eq_divide) 

19 

39097  20 
lemma (in finite_prob_space) finite_product_prob_space_of_images: 
21 
"finite_prob_space \<lparr> space = X ` space M \<times> Y ` space M, sets = Pow (X ` space M \<times> Y ` space M)\<rparr> 

22 
(joint_distribution X Y)" 

23 
(is "finite_prob_space ?S _") 

24 
proof (simp add: finite_prob_space_eq finite_product_measure_space_of_images) 

25 
have "X ` X ` space M \<inter> Y ` Y ` space M \<inter> space M = space M" by auto 

26 
thus "joint_distribution X Y (X ` space M \<times> Y ` space M) = 1" 

27 
by (simp add: distribution_def prob_space vimage_Times comp_def measure_space_1) 

28 
qed 

29 

30 
lemma (in finite_prob_space) finite_measure_space_prod: 

31 
assumes X: "finite_measure_space MX (distribution X)" 

32 
assumes Y: "finite_measure_space MY (distribution Y)" 

33 
shows "finite_measure_space (prod_measure_space MX MY) (joint_distribution X Y)" 

34 
(is "finite_measure_space ?M ?D") 

35 
proof (intro finite_measure_spaceI) 

36 
interpret X: finite_measure_space MX "distribution X" by fact 

37 
interpret Y: finite_measure_space MY "distribution Y" by fact 

38 
note finite_measure_space.finite_prod_measure_space[OF X Y, simp] 

39 
show "finite (space ?M)" using X.finite_space Y.finite_space by auto 

40 
show "joint_distribution X Y {} = 0" by simp 

41 
show "sets ?M = Pow (space ?M)" by simp 

42 
{ fix x show "?D (space ?M) \<noteq> \<omega>" by (rule distribution_finite) } 

43 
{ fix A B assume "A \<subseteq> space ?M" "B \<subseteq> space ?M" "A \<inter> B = {}" 

44 
have *: "(\<lambda>t. (X t, Y t)) ` (A \<union> B) \<inter> space M = 

45 
(\<lambda>t. (X t, Y t)) ` A \<inter> space M \<union> (\<lambda>t. (X t, Y t)) ` B \<inter> space M" 

46 
by auto 

47 
show "?D (A \<union> B) = ?D A + ?D B" unfolding distribution_def * 

48 
apply (rule measure_additive[symmetric]) 

49 
using `A \<inter> B = {}` by (auto simp: sets_eq_Pow) } 

50 
qed 

51 

36624  52 
section "Convex theory" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

53 

36624  54 
lemma log_setsum: 
55 
assumes "finite s" "s \<noteq> {}" 

56 
assumes "b > 1" 

57 
assumes "(\<Sum> i \<in> s. a i) = 1" 

58 
assumes "\<And> i. i \<in> s \<Longrightarrow> a i \<ge> 0" 

59 
assumes "\<And> i. i \<in> s \<Longrightarrow> y i \<in> {0 <..}" 

60 
shows "log b (\<Sum> i \<in> s. a i * y i) \<ge> (\<Sum> i \<in> s. a i * log b (y i))" 

61 
proof  

62 
have "convex_on {0 <..} (\<lambda> x.  log b x)" 

63 
by (rule minus_log_convex[OF `b > 1`]) 

64 
hence " log b (\<Sum> i \<in> s. a i * y i) \<le> (\<Sum> i \<in> s. a i *  log b (y i))" 

65 
using convex_on_setsum[of _ _ "\<lambda> x.  log b x"] assms pos_is_convex by fastsimp 

66 
thus ?thesis by (auto simp add:setsum_negf le_imp_neg_le) 

67 
qed 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

68 

36624  69 
lemma log_setsum': 
70 
assumes "finite s" "s \<noteq> {}" 

71 
assumes "b > 1" 

72 
assumes "(\<Sum> i \<in> s. a i) = 1" 

73 
assumes pos: "\<And> i. i \<in> s \<Longrightarrow> 0 \<le> a i" 

74 
"\<And> i. \<lbrakk> i \<in> s ; 0 < a i \<rbrakk> \<Longrightarrow> 0 < y i" 

75 
shows "log b (\<Sum> i \<in> s. a i * y i) \<ge> (\<Sum> i \<in> s. a i * log b (y i))" 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

76 
proof  
36624  77 
have "\<And>y. (\<Sum> i \<in> s  {i. a i = 0}. a i * y i) = (\<Sum> i \<in> s. a i * y i)" 
78 
using assms by (auto intro!: setsum_mono_zero_cong_left) 

79 
moreover have "log b (\<Sum> i \<in> s  {i. a i = 0}. a i * y i) \<ge> (\<Sum> i \<in> s  {i. a i = 0}. a i * log b (y i))" 

80 
proof (rule log_setsum) 

81 
have "setsum a (s  {i. a i = 0}) = setsum a s" 

82 
using assms(1) by (rule setsum_mono_zero_cong_left) auto 

83 
thus sum_1: "setsum a (s  {i. a i = 0}) = 1" 

84 
"finite (s  {i. a i = 0})" using assms by simp_all 

85 

86 
show "s  {i. a i = 0} \<noteq> {}" 

87 
proof 

88 
assume *: "s  {i. a i = 0} = {}" 

89 
hence "setsum a (s  {i. a i = 0}) = 0" by (simp add: * setsum_empty) 

90 
with sum_1 show False by simp 

38656  91 
qed 
36624  92 

93 
fix i assume "i \<in> s  {i. a i = 0}" 

94 
hence "i \<in> s" "a i \<noteq> 0" by simp_all 

95 
thus "0 \<le> a i" "y i \<in> {0<..}" using pos[of i] by auto 

96 
qed fact+ 

97 
ultimately show ?thesis by simp 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

98 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

99 

36624  100 
lemma log_setsum_divide: 
101 
assumes "finite S" and "S \<noteq> {}" and "1 < b" 

102 
assumes "(\<Sum>x\<in>S. g x) = 1" 

103 
assumes pos: "\<And>x. x \<in> S \<Longrightarrow> g x \<ge> 0" "\<And>x. x \<in> S \<Longrightarrow> f x \<ge> 0" 

104 
assumes g_pos: "\<And>x. \<lbrakk> x \<in> S ; 0 < g x \<rbrakk> \<Longrightarrow> 0 < f x" 

105 
shows " (\<Sum>x\<in>S. g x * log b (g x / f x)) \<le> log b (\<Sum>x\<in>S. f x)" 

106 
proof  

107 
have log_mono: "\<And>x y. 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log b x \<le> log b y" 

108 
using `1 < b` by (subst log_le_cancel_iff) auto 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

109 

36624  110 
have " (\<Sum>x\<in>S. g x * log b (g x / f x)) = (\<Sum>x\<in>S. g x * log b (f x / g x))" 
111 
proof (unfold setsum_negf[symmetric], rule setsum_cong) 

112 
fix x assume x: "x \<in> S" 

113 
show " (g x * log b (g x / f x)) = g x * log b (f x / g x)" 

114 
proof (cases "g x = 0") 

115 
case False 

116 
with pos[OF x] g_pos[OF x] have "0 < f x" "0 < g x" by simp_all 

117 
thus ?thesis using `1 < b` by (simp add: log_divide field_simps) 

118 
qed simp 

119 
qed rule 

120 
also have "... \<le> log b (\<Sum>x\<in>S. g x * (f x / g x))" 

121 
proof (rule log_setsum') 

122 
fix x assume x: "x \<in> S" "0 < g x" 

123 
with g_pos[OF x] show "0 < f x / g x" by (safe intro!: divide_pos_pos) 

124 
qed fact+ 

125 
also have "... = log b (\<Sum>x\<in>S  {x. g x = 0}. f x)" using `finite S` 

126 
by (auto intro!: setsum_mono_zero_cong_right arg_cong[where f="log b"] 

127 
split: split_if_asm) 

128 
also have "... \<le> log b (\<Sum>x\<in>S. f x)" 

129 
proof (rule log_mono) 

130 
have "0 = (\<Sum>x\<in>S  {x. g x = 0}. 0)" by simp 

131 
also have "... < (\<Sum>x\<in>S  {x. g x = 0}. f x)" (is "_ < ?sum") 

132 
proof (rule setsum_strict_mono) 

133 
show "finite (S  {x. g x = 0})" using `finite S` by simp 

134 
show "S  {x. g x = 0} \<noteq> {}" 

135 
proof 

136 
assume "S  {x. g x = 0} = {}" 

137 
hence "(\<Sum>x\<in>S. g x) = 0" by (subst setsum_0') auto 

138 
with `(\<Sum>x\<in>S. g x) = 1` show False by simp 

139 
qed 

140 
fix x assume "x \<in> S  {x. g x = 0}" 

141 
thus "0 < f x" using g_pos[of x] pos(1)[of x] by auto 

142 
qed 

143 
finally show "0 < ?sum" . 

144 
show "(\<Sum>x\<in>S  {x. g x = 0}. f x) \<le> (\<Sum>x\<in>S. f x)" 

145 
using `finite S` pos by (auto intro!: setsum_mono2) 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

146 
qed 
36624  147 
finally show ?thesis . 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

148 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

149 

39097  150 
lemma split_pairs: 
151 
shows 

152 
"((A, B) = X) \<longleftrightarrow> (fst X = A \<and> snd X = B)" and 

153 
"(X = (A, B)) \<longleftrightarrow> (fst X = A \<and> snd X = B)" by auto 

38656  154 

155 
section "Information theory" 

156 

157 
locale finite_information_space = finite_prob_space + 

158 
fixes b :: real assumes b_gt_1: "1 < b" 

159 

160 
context finite_information_space 

161 
begin 

162 

163 
lemma 

164 
assumes "0 \<le> A" and pos: "0 < A \<Longrightarrow> 0 < B" "0 < A \<Longrightarrow> 0 < C" 

165 
shows mult_log_mult: "A * log b (B * C) = A * log b B + A * log b C" (is "?mult") 

166 
and mult_log_divide: "A * log b (B / C) = A * log b B  A * log b C" (is "?div") 

36624  167 
proof  
38656  168 
have "?mult \<and> ?div" 
169 
proof (cases "A = 0") 

170 
case False 

171 
hence "0 < A" using `0 \<le> A` by auto 

172 
with pos[OF this] show "?mult \<and> ?div" using b_gt_1 

173 
by (auto simp: log_divide log_mult field_simps) 

174 
qed simp 

175 
thus ?mult and ?div by auto 

176 
qed 

177 

178 
ML {* 

179 

180 
(* tactic to solve equations of the form @{term "W * log b (X / (Y * Z)) = W * log b X  W * log b (Y * Z)"} 

181 
where @{term W} is a joint distribution of @{term X}, @{term Y}, and @{term Z}. *) 

182 

183 
val mult_log_intros = [@{thm mult_log_divide}, @{thm mult_log_mult}] 

184 
val intros = [@{thm divide_pos_pos}, @{thm mult_pos_pos}, @{thm real_pinfreal_nonneg}, 

185 
@{thm real_distribution_divide_pos_pos}, 

186 
@{thm real_distribution_mult_inverse_pos_pos}, 

187 
@{thm real_distribution_mult_pos_pos}] 

188 

189 
val distribution_gt_0_tac = (rtac @{thm distribution_mono_gt_0} 

190 
THEN' assume_tac 

191 
THEN' clarsimp_tac (clasimpset_of @{context} addsimps2 @{thms split_pairs})) 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

192 

38656  193 
val distr_mult_log_eq_tac = REPEAT_ALL_NEW (CHANGED o TRY o 
194 
(resolve_tac (mult_log_intros @ intros) 

195 
ORELSE' distribution_gt_0_tac 

196 
ORELSE' clarsimp_tac (clasimpset_of @{context}))) 

197 

198 
fun instanciate_term thy redex intro = 

199 
let 

200 
val intro_concl = Thm.concl_of intro 

201 

202 
val lhs = intro_concl > HOLogic.dest_Trueprop > HOLogic.dest_eq > fst 

203 

204 
val m = SOME (Pattern.match thy (lhs, redex) (Vartab.empty, Vartab.empty)) 

205 
handle Pattern.MATCH => NONE 

206 

207 
in 

208 
Option.map (fn m => Envir.subst_term m intro_concl) m 

209 
end 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

210 

38656  211 
fun mult_log_simproc simpset redex = 
212 
let 

213 
val ctxt = Simplifier.the_context simpset 

214 
val thy = ProofContext.theory_of ctxt 

215 
fun prove (SOME thm) = (SOME 

216 
(Goal.prove ctxt [] [] thm (K (distr_mult_log_eq_tac 1)) 

217 
> mk_meta_eq) 

218 
handle THM _ => NONE) 

219 
 prove NONE = NONE 

220 
in 

221 
get_first (instanciate_term thy (term_of redex) #> prove) mult_log_intros 

222 
end 

223 
*} 

224 

225 
simproc_setup mult_log ("real (distribution X x) * log b (A * B)"  

226 
"real (distribution X x) * log b (A / B)") = {* K mult_log_simproc *} 

227 

228 
end 

229 

39097  230 
subsection "Kullback$$Leibler divergence" 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

231 

39097  232 
text {* The Kullback$$Leibler divergence is also known as relative entropy or 
233 
Kullback$$Leibler distance. *} 

234 

235 
definition 

236 
"KL_divergence b M \<mu> \<nu> = 

237 
measure_space.integral M \<mu> (\<lambda>x. log b (real (sigma_finite_measure.RN_deriv M \<nu> \<mu> x)))" 

38656  238 

239 
lemma (in finite_measure_space) KL_divergence_eq_finite: 

240 
assumes v: "finite_measure_space M \<nu>" 

241 
assumes ac: "\<forall>x\<in>space M. \<mu> {x} = 0 \<longrightarrow> \<nu> {x} = 0" 

242 
shows "KL_divergence b M \<nu> \<mu> = (\<Sum>x\<in>space M. real (\<nu> {x}) * log b (real (\<nu> {x}) / real (\<mu> {x})))" (is "_ = ?sum") 

243 
proof (simp add: KL_divergence_def finite_measure_space.integral_finite_singleton[OF v]) 

244 
interpret v: finite_measure_space M \<nu> by fact 

245 
have ms: "measure_space M \<nu>" by fact 

246 
have ac: "absolutely_continuous \<nu>" 

247 
using ac by (auto intro!: absolutely_continuousI[OF v]) 

248 
show "(\<Sum>x \<in> space M. log b (real (RN_deriv \<nu> x)) * real (\<nu> {x})) = ?sum" 

249 
using RN_deriv_finite_measure[OF ms ac] 

250 
by (auto intro!: setsum_cong simp: field_simps real_of_pinfreal_mult[symmetric]) 

251 
qed 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

252 

38656  253 
lemma (in finite_prob_space) KL_divergence_positive_finite: 
254 
assumes v: "finite_prob_space M \<nu>" 

255 
assumes ac: "\<And>x. \<lbrakk> x \<in> space M ; \<mu> {x} = 0 \<rbrakk> \<Longrightarrow> \<nu> {x} = 0" 

256 
and "1 < b" 

257 
shows "0 \<le> KL_divergence b M \<nu> \<mu>" 

258 
proof  

259 
interpret v: finite_prob_space M \<nu> using v . 

260 

261 
have *: "space M \<noteq> {}" using not_empty by simp 

262 

263 
hence " (KL_divergence b M \<nu> \<mu>) \<le> log b (\<Sum>x\<in>space M. real (\<mu> {x}))" 

264 
proof (subst KL_divergence_eq_finite) 

265 
show "finite_measure_space M \<nu>" by fact 

266 

267 
show "\<forall>x\<in>space M. \<mu> {x} = 0 \<longrightarrow> \<nu> {x} = 0" using ac by auto 

268 
show " (\<Sum>x\<in>space M. real (\<nu> {x}) * log b (real (\<nu> {x}) / real (\<mu> {x}))) \<le> log b (\<Sum>x\<in>space M. real (\<mu> {x}))" 

269 
proof (safe intro!: log_setsum_divide *) 

270 
show "finite (space M)" using finite_space by simp 

271 
show "1 < b" by fact 

272 
show "(\<Sum>x\<in>space M. real (\<nu> {x})) = 1" using v.finite_sum_over_space_eq_1 by simp 

273 

274 
fix x assume x: "x \<in> space M" 

275 
{ assume "0 < real (\<nu> {x})" 

276 
hence "\<mu> {x} \<noteq> 0" using ac[OF x] by auto 

39097  277 
thus "0 < prob {x}" using finite_measure[of "{x}"] sets_eq_Pow x 
38656  278 
by (cases "\<mu> {x}") simp_all } 
279 
qed auto 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

280 
qed 
38656  281 
thus "0 \<le> KL_divergence b M \<nu> \<mu>" using finite_sum_over_space_eq_1 by simp 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

282 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

283 

39097  284 
subsection {* Mutual Information *} 
285 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

286 
definition (in prob_space) 
38656  287 
"mutual_information b S T X Y = 
288 
KL_divergence b (prod_measure_space S T) 

289 
(joint_distribution X Y) 

290 
(prod_measure S (distribution X) T (distribution Y))" 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

291 

36624  292 
abbreviation (in finite_information_space) 
293 
finite_mutual_information ("\<I>'(_ ; _')") where 

294 
"\<I>(X ; Y) \<equiv> mutual_information b 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

295 
\<lparr> space = X`space M, sets = Pow (X`space M) \<rparr> 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

296 
\<lparr> space = Y`space M, sets = Pow (Y`space M) \<rparr> X Y" 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

297 

39092  298 
lemma (in finite_information_space) mutual_information_generic_eq: 
299 
assumes MX: "finite_measure_space MX (distribution X)" 

300 
assumes MY: "finite_measure_space MY (distribution Y)" 

301 
shows "mutual_information b MX MY X Y = (\<Sum> (x,y) \<in> space MX \<times> space MY. 

302 
real (joint_distribution X Y {(x,y)}) * 

303 
log b (real (joint_distribution X Y {(x,y)}) / 

304 
(real (distribution X {x}) * real (distribution Y {y}))))" 

305 
proof  

306 
let ?P = "prod_measure_space MX MY" 

307 
let ?\<mu> = "prod_measure MX (distribution X) MY (distribution Y)" 

308 
let ?\<nu> = "joint_distribution X Y" 

309 
interpret X: finite_measure_space MX "distribution X" by fact 

310 
moreover interpret Y: finite_measure_space MY "distribution Y" by fact 

311 
have fms: "finite_measure_space MX (distribution X)" 

312 
"finite_measure_space MY (distribution Y)" by fact+ 

313 
have fms_P: "finite_measure_space ?P ?\<mu>" 

314 
by (rule X.finite_measure_space_finite_prod_measure) fact 

315 
then interpret P: finite_measure_space ?P ?\<mu> . 

316 
have fms_P': "finite_measure_space ?P ?\<nu>" 

317 
using finite_product_measure_space[of "space MX" "space MY"] 

318 
X.finite_space Y.finite_space sigma_prod_sets_finite[OF X.finite_space Y.finite_space] 

319 
X.sets_eq_Pow Y.sets_eq_Pow 

320 
by (simp add: prod_measure_space_def sigma_def) 

321 
then interpret P': finite_measure_space ?P ?\<nu> . 

322 
{ fix x assume "x \<in> space ?P" 

323 
hence in_MX: "{fst x} \<in> sets MX" "{snd x} \<in> sets MY" using X.sets_eq_Pow Y.sets_eq_Pow 

324 
by (auto simp: prod_measure_space_def) 

325 
assume "?\<mu> {x} = 0" 

326 
with X.finite_prod_measure_times[OF fms(2), of "{fst x}" "{snd x}"] in_MX 

327 
have "distribution X {fst x} = 0 \<or> distribution Y {snd x} = 0" 

328 
by (simp add: prod_measure_space_def) 

329 
hence "joint_distribution X Y {x} = 0" 

330 
by (cases x) (auto simp: distribution_order) } 

331 
note measure_0 = this 

332 
show ?thesis 

333 
unfolding Let_def mutual_information_def 

334 
using measure_0 fms_P fms_P' MX MY P.absolutely_continuous_def 

335 
by (subst P.KL_divergence_eq_finite) 

336 
(auto simp add: prod_measure_space_def prod_measure_times_finite 

337 
finite_prob_space_eq setsum_cartesian_product' real_of_pinfreal_mult[symmetric]) 

338 
qed 

339 

36624  340 
lemma (in finite_information_space) 
38656  341 
assumes MX: "finite_prob_space MX (distribution X)" 
342 
assumes MY: "finite_prob_space MY (distribution Y)" 

36624  343 
and X_space: "X ` space M \<subseteq> space MX" and Y_space: "Y ` space M \<subseteq> space MY" 
344 
shows mutual_information_eq_generic: 

345 
"mutual_information b MX MY X Y = (\<Sum> (x,y) \<in> space MX \<times> space MY. 

38656  346 
real (joint_distribution X Y {(x,y)}) * 
347 
log b (real (joint_distribution X Y {(x,y)}) / 

348 
(real (distribution X {x}) * real (distribution Y {y}))))" 

36624  349 
(is "?equality") 
350 
and mutual_information_positive_generic: 

351 
"0 \<le> mutual_information b MX MY X Y" (is "?positive") 

352 
proof  

38656  353 
let ?P = "prod_measure_space MX MY" 
354 
let ?\<mu> = "prod_measure MX (distribution X) MY (distribution Y)" 

355 
let ?\<nu> = "joint_distribution X Y" 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

356 

38656  357 
interpret X: finite_prob_space MX "distribution X" by fact 
358 
moreover interpret Y: finite_prob_space MY "distribution Y" by fact 

359 
have ms_X: "measure_space MX (distribution X)" 

360 
and ms_Y: "measure_space MY (distribution Y)" 

361 
and fms: "finite_measure_space MX (distribution X)" "finite_measure_space MY (distribution Y)" by fact+ 

362 
have fms_P: "finite_measure_space ?P ?\<mu>" 

363 
by (rule X.finite_measure_space_finite_prod_measure) fact 

364 
then interpret P: finite_measure_space ?P ?\<mu> . 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

365 

38656  366 
have fms_P': "finite_measure_space ?P ?\<nu>" 
36624  367 
using finite_product_measure_space[of "space MX" "space MY"] 
368 
X.finite_space Y.finite_space sigma_prod_sets_finite[OF X.finite_space Y.finite_space] 

369 
X.sets_eq_Pow Y.sets_eq_Pow 

38656  370 
by (simp add: prod_measure_space_def sigma_def) 
371 
then interpret P': finite_measure_space ?P ?\<nu> . 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

372 

36624  373 
{ fix x assume "x \<in> space ?P" 
38656  374 
hence in_MX: "{fst x} \<in> sets MX" "{snd x} \<in> sets MY" using X.sets_eq_Pow Y.sets_eq_Pow 
36624  375 
by (auto simp: prod_measure_space_def) 
376 

38656  377 
assume "?\<mu> {x} = 0" 
378 
with X.finite_prod_measure_times[OF fms(2), of "{fst x}" "{snd x}"] in_MX 

36624  379 
have "distribution X {fst x} = 0 \<or> distribution Y {snd x} = 0" 
380 
by (simp add: prod_measure_space_def) 

381 

382 
hence "joint_distribution X Y {x} = 0" 

383 
by (cases x) (auto simp: distribution_order) } 

384 
note measure_0 = this 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

385 

36624  386 
show ?equality 
38656  387 
unfolding Let_def mutual_information_def 
388 
using measure_0 fms_P fms_P' MX MY P.absolutely_continuous_def 

389 
by (subst P.KL_divergence_eq_finite) 

390 
(auto simp add: prod_measure_space_def prod_measure_times_finite 

391 
finite_prob_space_eq setsum_cartesian_product' real_of_pinfreal_mult[symmetric]) 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

392 

36624  393 
show ?positive 
394 
unfolding Let_def mutual_information_def using measure_0 b_gt_1 

38656  395 
proof (safe intro!: finite_prob_space.KL_divergence_positive_finite, simp_all) 
396 
have "?\<mu> (space ?P) = 1" 

397 
using X.top Y.top X.measure_space_1 Y.measure_space_1 fms 

398 
by (simp add: prod_measure_space_def X.finite_prod_measure_times) 

399 
with fms_P show "finite_prob_space ?P ?\<mu>" 

36624  400 
by (simp add: finite_prob_space_eq) 
401 

38656  402 
from ms_X ms_Y X.top Y.top X.measure_space_1 Y.measure_space_1 Y.not_empty X_space Y_space 
403 
have "?\<nu> (space ?P) = 1" unfolding measure_space_1[symmetric] 

404 
by (auto intro!: arg_cong[where f="\<mu>"] 

405 
simp add: prod_measure_space_def distribution_def vimage_Times comp_def) 

406 
with fms_P' show "finite_prob_space ?P ?\<nu>" 

36624  407 
by (simp add: finite_prob_space_eq) 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

408 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

409 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

410 

36624  411 
lemma (in finite_information_space) mutual_information_eq: 
412 
"\<I>(X;Y) = (\<Sum> (x,y) \<in> X ` space M \<times> Y ` space M. 

38656  413 
real (distribution (\<lambda>x. (X x, Y x)) {(x,y)}) * log b (real (distribution (\<lambda>x. (X x, Y x)) {(x,y)}) / 
414 
(real (distribution X {x}) * real (distribution Y {y}))))" 

36624  415 
by (subst mutual_information_eq_generic) (simp_all add: finite_prob_space_of_images) 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

416 

39097  417 
lemma (in finite_information_space) mutual_information_cong: 
418 
assumes X: "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x" 

419 
assumes Y: "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x" 

420 
shows "\<I>(X ; Y) = \<I>(X' ; Y')" 

421 
proof  

422 
have "X ` space M = X' ` space M" using X by (auto intro!: image_eqI) 

423 
moreover have "Y ` space M = Y' ` space M" using Y by (auto intro!: image_eqI) 

424 
ultimately show ?thesis 

425 
unfolding mutual_information_eq 

426 
using 

427 
assms[THEN distribution_cong] 

428 
joint_distribution_cong[OF assms] 

429 
by (auto intro!: setsum_cong) 

430 
qed 

431 

36624  432 
lemma (in finite_information_space) mutual_information_positive: "0 \<le> \<I>(X;Y)" 
433 
by (subst mutual_information_positive_generic) (simp_all add: finite_prob_space_of_images) 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

434 

39097  435 
subsection {* Entropy *} 
436 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

437 
definition (in prob_space) 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

438 
"entropy b s X = mutual_information b s s X X" 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

439 

36624  440 
abbreviation (in finite_information_space) 
441 
finite_entropy ("\<H>'(_')") where 

442 
"\<H>(X) \<equiv> entropy b \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr> X" 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

443 

39097  444 
lemma (in finite_information_space) entropy_generic_eq: 
445 
assumes MX: "finite_measure_space MX (distribution X)" 

446 
shows "entropy b MX X = (\<Sum> x \<in> space MX. real (distribution X {x}) * log b (real (distribution X {x})))" 

447 
proof  

448 
let "?X x" = "real (distribution X {x})" 

449 
let "?XX x y" = "real (joint_distribution X X {(x, y)})" 

450 
interpret MX: finite_measure_space MX "distribution X" by fact 

451 
{ fix x y 

452 
have "(\<lambda>x. (X x, X x)) ` {(x, y)} = (if x = y then X ` {x} else {})" by auto 

453 
then have "?XX x y * log b (?XX x y / (?X x * ?X y)) = 

454 
(if x = y then  ?X y * log b (?X y) else 0)" 

455 
unfolding distribution_def by (auto simp: mult_log_divide) } 

456 
note remove_XX = this 

457 
show ?thesis 

458 
unfolding entropy_def mutual_information_generic_eq[OF MX MX] 

459 
unfolding setsum_cartesian_product[symmetric] setsum_negf[symmetric] remove_XX 

460 
by (auto simp: setsum_cases MX.finite_space) 

461 
qed 

36624  462 

463 
lemma (in finite_information_space) entropy_eq: 

38656  464 
"\<H>(X) = (\<Sum> x \<in> X ` space M. real (distribution X {x}) * log b (real (distribution X {x})))" 
39097  465 
by (simp add: finite_measure_space entropy_generic_eq) 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

466 

36624  467 
lemma (in finite_information_space) entropy_positive: "0 \<le> \<H>(X)" 
468 
unfolding entropy_def using mutual_information_positive . 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

469 

39097  470 
lemma (in finite_information_space) entropy_certainty_eq_0: 
471 
assumes "x \<in> X ` space M" and "distribution X {x} = 1" 

472 
shows "\<H>(X) = 0" 

473 
proof  

474 
interpret X: finite_prob_space "\<lparr> space = X ` space M, sets = Pow (X ` space M) \<rparr>" "distribution X" 

475 
by (rule finite_prob_space_of_images) 

476 

477 
have "distribution X (X ` space M  {x}) = distribution X (X ` space M)  distribution X {x}" 

478 
using X.measure_compl[of "{x}"] assms by auto 

479 
also have "\<dots> = 0" using X.prob_space assms by auto 

480 
finally have X0: "distribution X (X ` space M  {x}) = 0" by auto 

481 

482 
{ fix y assume asm: "y \<noteq> x" "y \<in> X ` space M" 

483 
hence "{y} \<subseteq> X ` space M  {x}" by auto 

484 
from X.measure_mono[OF this] X0 asm 

485 
have "distribution X {y} = 0" by auto } 

486 

487 
hence fi: "\<And> y. y \<in> X ` space M \<Longrightarrow> real (distribution X {y}) = (if x = y then 1 else 0)" 

488 
using assms by auto 

489 

490 
have y: "\<And>y. (if x = y then 1 else 0) * log b (if x = y then 1 else 0) = 0" by simp 

491 

492 
show ?thesis unfolding entropy_eq by (auto simp: y fi) 

493 
qed 

494 

495 
lemma (in finite_information_space) entropy_le_card_not_0: 

496 
"\<H>(X) \<le> log b (real (card (X ` space M \<inter> {x . distribution X {x} \<noteq> 0})))" 

497 
proof  

498 
let "?d x" = "distribution X {x}" 

499 
let "?p x" = "real (?d x)" 

500 
have "\<H>(X) = (\<Sum>x\<in>X`space M. ?p x * log b (1 / ?p x))" 

501 
by (auto intro!: setsum_cong simp: entropy_eq setsum_negf[symmetric]) 

502 
also have "\<dots> \<le> log b (\<Sum>x\<in>X`space M. ?p x * (1 / ?p x))" 

503 
apply (rule log_setsum') 

504 
using not_empty b_gt_1 finite_space sum_over_space_real_distribution 

505 
by auto 

506 
also have "\<dots> = log b (\<Sum>x\<in>X`space M. if ?d x \<noteq> 0 then 1 else 0)" 

507 
apply (rule arg_cong[where f="\<lambda>f. log b (\<Sum>x\<in>X`space M. f x)"]) 

508 
using distribution_finite[of X] by (auto simp: expand_fun_eq real_of_pinfreal_eq_0) 

509 
finally show ?thesis 

510 
using finite_space by (auto simp: setsum_cases real_eq_of_nat) 

511 
qed 

512 

513 
lemma (in finite_information_space) entropy_uniform_max: 

514 
assumes "\<And>x y. \<lbrakk> x \<in> X ` space M ; y \<in> X ` space M \<rbrakk> \<Longrightarrow> distribution X {x} = distribution X {y}" 

515 
shows "\<H>(X) = log b (real (card (X ` space M)))" 

516 
proof  

517 
note uniform = 

518 
finite_prob_space_of_images[of X, THEN finite_prob_space.uniform_prob, simplified] 

519 

520 
have card_gt0: "0 < card (X ` space M)" unfolding card_gt_0_iff 

521 
using finite_space not_empty by auto 

522 

523 
{ fix x assume "x \<in> X ` space M" 

524 
hence "real (distribution X {x}) = 1 / real (card (X ` space M))" 

525 
proof (rule uniform) 

526 
fix x y assume "x \<in> X`space M" "y \<in> X`space M" 

527 
from assms[OF this] show "real (distribution X {x}) = real (distribution X {y})" by simp 

528 
qed } 

529 
thus ?thesis 

530 
using not_empty finite_space b_gt_1 card_gt0 

531 
by (simp add: entropy_eq real_eq_of_nat[symmetric] log_divide) 

532 
qed 

533 

534 
lemma (in finite_information_space) entropy_le_card: 

535 
"\<H>(X) \<le> log b (real (card (X ` space M)))" 

536 
proof cases 

537 
assume "X ` space M \<inter> {x. distribution X {x} \<noteq> 0} = {}" 

538 
then have "\<And>x. x\<in>X`space M \<Longrightarrow> distribution X {x} = 0" by auto 

539 
moreover 

540 
have "0 < card (X`space M)" 

541 
using finite_space not_empty unfolding card_gt_0_iff by auto 

542 
then have "log b 1 \<le> log b (real (card (X`space M)))" 

543 
using b_gt_1 by (intro log_le) auto 

544 
ultimately show ?thesis unfolding entropy_eq by simp 

545 
next 

546 
assume False: "X ` space M \<inter> {x. distribution X {x} \<noteq> 0} \<noteq> {}" 

547 
have "card (X ` space M \<inter> {x. distribution X {x} \<noteq> 0}) \<le> card (X ` space M)" 

548 
(is "?A \<le> ?B") using finite_space not_empty by (auto intro!: card_mono) 

549 
note entropy_le_card_not_0 

550 
also have "log b (real ?A) \<le> log b (real ?B)" 

551 
using b_gt_1 False finite_space not_empty `?A \<le> ?B` 

552 
by (auto intro!: log_le simp: card_gt_0_iff) 

553 
finally show ?thesis . 

554 
qed 

555 

556 
lemma (in finite_information_space) entropy_commute: 

557 
"\<H>(\<lambda>x. (X x, Y x)) = \<H>(\<lambda>x. (Y x, X x))" 

558 
proof  

559 
have *: "(\<lambda>x. (Y x, X x))`space M = (\<lambda>(a,b). (b,a))`(\<lambda>x. (X x, Y x))`space M" 

560 
by auto 

561 
have inj: "\<And>X. inj_on (\<lambda>(a,b). (b,a)) X" 

562 
by (auto intro!: inj_onI) 

563 
show ?thesis 

564 
unfolding entropy_eq unfolding * setsum_reindex[OF inj] 

565 
by (simp add: joint_distribution_commute[of Y X] split_beta) 

566 
qed 

567 

568 
lemma (in finite_information_space) entropy_eq_cartesian_sum: 

569 
"\<H>(\<lambda>x. (X x, Y x)) = (\<Sum>x\<in>X`space M. \<Sum>y\<in>Y`space M. 

570 
real (joint_distribution X Y {(x,y)}) * 

571 
log b (real (joint_distribution X Y {(x,y)})))" 

572 
proof  

573 
{ fix x assume "x\<notin>(\<lambda>x. (X x, Y x))`space M" 

574 
then have "(\<lambda>x. (X x, Y x)) ` {x} \<inter> space M = {}" by auto 

575 
then have "joint_distribution X Y {x} = 0" 

576 
unfolding distribution_def by auto } 

577 
then show ?thesis using finite_space 

578 
unfolding entropy_eq neg_equal_iff_equal setsum_cartesian_product 

579 
by (auto intro!: setsum_mono_zero_cong_left) 

580 
qed 

581 

582 
subsection {* Conditional Mutual Information *} 

583 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

584 
definition (in prob_space) 
38656  585 
"conditional_mutual_information b M1 M2 M3 X Y Z \<equiv> 
586 
mutual_information b M1 (prod_measure_space M2 M3) X (\<lambda>x. (Y x, Z x))  

587 
mutual_information b M1 M3 X Z" 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

588 

36624  589 
abbreviation (in finite_information_space) 
590 
finite_conditional_mutual_information ("\<I>'( _ ; _  _ ')") where 

591 
"\<I>(X ; Y  Z) \<equiv> conditional_mutual_information b 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

592 
\<lparr> space = X`space M, sets = Pow (X`space M) \<rparr> 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

593 
\<lparr> space = Y`space M, sets = Pow (Y`space M) \<rparr> 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

594 
\<lparr> space = Z`space M, sets = Pow (Z`space M) \<rparr> 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

595 
X Y Z" 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

596 

39097  597 
lemma (in finite_information_space) conditional_mutual_information_generic_eq: 
598 
assumes MX: "finite_measure_space MX (distribution X)" 

599 
assumes MY: "finite_measure_space MY (distribution Y)" 

600 
assumes MZ: "finite_measure_space MZ (distribution Z)" 

601 
shows "conditional_mutual_information b MX MY MZ X Y Z = 

602 
(\<Sum>(x, y, z)\<in>space MX \<times> space MY \<times> space MZ. 

603 
real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) * 

604 
log b (real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) / 

605 
(real (distribution X {x}) * real (joint_distribution Y Z {(y, z)}))))  

606 
(\<Sum>(x, y)\<in>space MX \<times> space MZ. 

607 
real (joint_distribution X Z {(x, y)}) * 

608 
log b (real (joint_distribution X Z {(x, y)}) / (real (distribution X {x}) * real (distribution Z {y}))))" 

609 
using assms finite_measure_space_prod[OF MY MZ] 

610 
unfolding conditional_mutual_information_def 

611 
by (subst (1 2) mutual_information_generic_eq) 

612 
(simp_all add: setsum_cartesian_product' finite_measure_space.finite_prod_measure_space) 

38656  613 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

614 

36624  615 
lemma (in finite_information_space) conditional_mutual_information_eq: 
616 
"\<I>(X ; Y  Z) = (\<Sum>(x, y, z) \<in> X ` space M \<times> Y ` space M \<times> Z ` space M. 

38656  617 
real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) * 
618 
log b (real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) / 

619 
(real (joint_distribution X Z {(x, z)}) * real (joint_distribution Y Z {(y,z)} / distribution Z {z}))))" 

39097  620 
by (subst conditional_mutual_information_generic_eq) 
38656  621 
(auto simp add: prod_measure_space_def sigma_prod_sets_finite finite_space 
39097  622 
finite_measure_space finite_product_prob_space_of_images sigma_def 
36624  623 
setsum_cartesian_product' setsum_product setsum_subtractf setsum_addf 
38656  624 
setsum_left_distrib[symmetric] setsum_real_distribution setsum_commute[where A="Y`space M"] 
625 
real_of_pinfreal_mult[symmetric] 

36624  626 
cong: setsum_cong) 
627 

628 
lemma (in finite_information_space) conditional_mutual_information_eq_mutual_information: 

629 
"\<I>(X ; Y) = \<I>(X ; Y  (\<lambda>x. ()))" 

630 
proof  

631 
have [simp]: "(\<lambda>x. ()) ` space M = {()}" using not_empty by auto 

632 

633 
show ?thesis 

634 
unfolding conditional_mutual_information_eq mutual_information_eq 

635 
by (simp add: setsum_cartesian_product' distribution_remove_const) 

636 
qed 

637 

638 
lemma (in finite_information_space) conditional_mutual_information_positive: 

639 
"0 \<le> \<I>(X ; Y  Z)" 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

640 
proof  
38656  641 
let "?dXYZ A" = "real (distribution (\<lambda>x. (X x, Y x, Z x)) A)" 
642 
let "?dXZ A" = "real (joint_distribution X Z A)" 

643 
let "?dYZ A" = "real (joint_distribution Y Z A)" 

644 
let "?dX A" = "real (distribution X A)" 

645 
let "?dZ A" = "real (distribution Z A)" 

36624  646 
let ?M = "X ` space M \<times> Y ` space M \<times> Z ` space M" 
647 

648 
have split_beta: "\<And>f. split f = (\<lambda>x. f (fst x) (snd x))" by (simp add: expand_fun_eq) 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

649 

36624  650 
have " (\<Sum>(x, y, z) \<in> ?M. ?dXYZ {(x, y, z)} * 
651 
log b (?dXYZ {(x, y, z)} / (?dXZ {(x, z)} * ?dYZ {(y,z)} / ?dZ {z}))) 

652 
\<le> log b (\<Sum>(x, y, z) \<in> ?M. ?dXZ {(x, z)} * ?dYZ {(y,z)} / ?dZ {z})" 

653 
unfolding split_beta 

654 
proof (rule log_setsum_divide) 

655 
show "?M \<noteq> {}" using not_empty by simp 

656 
show "1 < b" using b_gt_1 . 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

657 

36624  658 
fix x assume "x \<in> ?M" 
38656  659 
let ?x = "(fst x, fst (snd x), snd (snd x))" 
660 

661 
show "0 \<le> ?dXYZ {?x}" using real_pinfreal_nonneg . 

36624  662 
show "0 \<le> ?dXZ {(fst x, snd (snd x))} * ?dYZ {(fst (snd x), snd (snd x))} / ?dZ {snd (snd x)}" 
38656  663 
by (simp add: real_pinfreal_nonneg mult_nonneg_nonneg divide_nonneg_nonneg) 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

664 

38656  665 
assume *: "0 < ?dXYZ {?x}" 
36624  666 
thus "0 < ?dXZ {(fst x, snd (snd x))} * ?dYZ {(fst (snd x), snd (snd x))} / ?dZ {snd (snd x)}" 
38656  667 
apply (rule_tac divide_pos_pos mult_pos_pos)+ 
668 
by (auto simp add: real_distribution_gt_0 intro: distribution_order(6) distribution_mono_gt_0) 

669 
qed (simp_all add: setsum_cartesian_product' sum_over_space_real_distribution setsum_real_distribution finite_space) 

36624  670 
also have "(\<Sum>(x, y, z) \<in> ?M. ?dXZ {(x, z)} * ?dYZ {(y,z)} / ?dZ {z}) = (\<Sum>z\<in>Z`space M. ?dZ {z})" 
671 
apply (simp add: setsum_cartesian_product') 

672 
apply (subst setsum_commute) 

673 
apply (subst (2) setsum_commute) 

38656  674 
by (auto simp: setsum_divide_distrib[symmetric] setsum_product[symmetric] setsum_real_distribution 
36624  675 
intro!: setsum_cong) 
676 
finally show ?thesis 

38656  677 
unfolding conditional_mutual_information_eq sum_over_space_real_distribution 
678 
by (simp add: real_of_pinfreal_mult[symmetric]) 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

679 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

680 

39097  681 
subsection {* Conditional Entropy *} 
682 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

683 
definition (in prob_space) 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

684 
"conditional_entropy b S T X Y = conditional_mutual_information b S S T X X Y" 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

685 

36624  686 
abbreviation (in finite_information_space) 
687 
finite_conditional_entropy ("\<H>'(_  _')") where 

688 
"\<H>(X  Y) \<equiv> conditional_entropy b 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

689 
\<lparr> space = X`space M, sets = Pow (X`space M) \<rparr> 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

690 
\<lparr> space = Y`space M, sets = Pow (Y`space M) \<rparr> X Y" 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

691 

36624  692 
lemma (in finite_information_space) conditional_entropy_positive: 
693 
"0 \<le> \<H>(X  Y)" unfolding conditional_entropy_def using conditional_mutual_information_positive . 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

694 

39097  695 
lemma (in finite_information_space) conditional_entropy_generic_eq: 
696 
assumes MX: "finite_measure_space MX (distribution X)" 

697 
assumes MY: "finite_measure_space MZ (distribution Z)" 

698 
shows "conditional_entropy b MX MZ X Z = 

699 
 (\<Sum>(x, z)\<in>space MX \<times> space MZ. 

700 
real (joint_distribution X Z {(x, z)}) * 

701 
log b (real (joint_distribution X Z {(x, z)}) / real (distribution Z {z})))" 

702 
unfolding conditional_entropy_def using assms 

703 
apply (simp add: conditional_mutual_information_generic_eq 

704 
setsum_cartesian_product' setsum_commute[of _ "space MZ"] 

705 
setsum_negf[symmetric] setsum_subtractf[symmetric]) 

706 
proof (safe intro!: setsum_cong, simp) 

707 
fix z x assume "z \<in> space MZ" "x \<in> space MX" 

708 
let "?XXZ x'" = "real (joint_distribution X (\<lambda>x. (X x, Z x)) {(x, x', z)})" 

709 
let "?XZ x'" = "real (joint_distribution X Z {(x', z)})" 

710 
let "?X" = "real (distribution X {x})" 

711 
interpret MX: finite_measure_space MX "distribution X" by fact 

712 
have *: "\<And>A. A = {} \<Longrightarrow> prob A = 0" by simp 

713 
have XXZ: "\<And>x'. ?XXZ x' = (if x' = x then ?XZ x else 0)" 

714 
by (auto simp: distribution_def intro!: arg_cong[where f=prob] *) 

715 
have "(\<Sum>x'\<in>space MX. ?XXZ x' * log b (?XXZ x')  (?XXZ x' * log b ?X + ?XXZ x' * log b (?XZ x'))) = 

716 
(\<Sum>x'\<in>{x}. ?XZ x' * log b (?XZ x')  (?XZ x' * log b ?X + ?XZ x' * log b (?XZ x')))" 

717 
using `x \<in> space MX` MX.finite_space 

718 
by (safe intro!: setsum_mono_zero_cong_right) 

719 
(auto split: split_if_asm simp: XXZ) 

720 
then show "(\<Sum>x'\<in>space MX. ?XXZ x' * log b (?XXZ x')  (?XXZ x' * log b ?X + ?XXZ x' * log b (?XZ x'))) + 

721 
?XZ x * log b ?X = 0" by simp 

722 
qed 

723 

36624  724 
lemma (in finite_information_space) conditional_entropy_eq: 
725 
"\<H>(X  Z) = 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

726 
 (\<Sum>(x, z)\<in>X ` space M \<times> Z ` space M. 
38656  727 
real (joint_distribution X Z {(x, z)}) * 
728 
log b (real (joint_distribution X Z {(x, z)}) / real (distribution Z {z})))" 

39097  729 
by (simp add: finite_measure_space conditional_entropy_generic_eq) 
730 

731 
lemma (in finite_information_space) conditional_entropy_eq_ce_with_hypothesis: 

732 
"\<H>(X  Y) = 

733 
(\<Sum>y\<in>Y`space M. real (distribution Y {y}) * 

734 
(\<Sum>x\<in>X`space M. real (joint_distribution X Y {(x,y)}) / real (distribution Y {(y)}) * 

735 
log b (real (joint_distribution X Y {(x,y)}) / real (distribution Y {(y)}))))" 

736 
unfolding conditional_entropy_eq neg_equal_iff_equal 

737 
apply (simp add: setsum_commute[of _ "Y`space M"] setsum_cartesian_product' setsum_divide_distrib[symmetric]) 

738 
apply (safe intro!: setsum_cong) 

739 
using real_distribution_order'[of Y _ X _] 

740 
by (auto simp add: setsum_subtractf[of _ _ "X`space M"]) 

741 

742 
lemma (in finite_information_space) conditional_entropy_eq_cartesian_sum: 

743 
"\<H>(X  Y) = (\<Sum>x\<in>X`space M. \<Sum>y\<in>Y`space M. 

744 
real (joint_distribution X Y {(x,y)}) * 

745 
log b (real (joint_distribution X Y {(x,y)}) / real (distribution Y {y})))" 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

746 
proof  
39097  747 
{ fix x assume "x\<notin>(\<lambda>x. (X x, Y x))`space M" 
748 
then have "(\<lambda>x. (X x, Y x)) ` {x} \<inter> space M = {}" by auto 

749 
then have "joint_distribution X Y {x} = 0" 

750 
unfolding distribution_def by auto } 

751 
then show ?thesis using finite_space 

752 
unfolding conditional_entropy_eq neg_equal_iff_equal setsum_cartesian_product 

753 
by (auto intro!: setsum_mono_zero_cong_left) 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

754 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

755 

39097  756 
subsection {* Equalities *} 
757 

36624  758 
lemma (in finite_information_space) mutual_information_eq_entropy_conditional_entropy: 
759 
"\<I>(X ; Z) = \<H>(X)  \<H>(X  Z)" 

760 
unfolding mutual_information_eq entropy_eq conditional_entropy_eq 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

761 
using finite_space 
36624  762 
by (auto simp add: setsum_addf setsum_subtractf setsum_cartesian_product' 
38656  763 
setsum_left_distrib[symmetric] setsum_addf setsum_real_distribution) 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

764 

36624  765 
lemma (in finite_information_space) conditional_entropy_less_eq_entropy: 
766 
"\<H>(X  Z) \<le> \<H>(X)" 

767 
proof  

768 
have "\<I>(X ; Z) = \<H>(X)  \<H>(X  Z)" using mutual_information_eq_entropy_conditional_entropy . 

769 
with mutual_information_positive[of X Z] entropy_positive[of X] 

770 
show ?thesis by auto 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

771 
qed 
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

772 

39097  773 
lemma (in finite_information_space) entropy_chain_rule: 
774 
"\<H>(\<lambda>x. (X x, Y x)) = \<H>(X) + \<H>(YX)" 

775 
unfolding entropy_eq[of X] entropy_eq_cartesian_sum conditional_entropy_eq_cartesian_sum 

776 
unfolding setsum_commute[of _ "X`space M"] setsum_negf[symmetric] setsum_addf[symmetric] 

777 
by (rule setsum_cong) 

778 
(simp_all add: setsum_negf setsum_addf setsum_subtractf setsum_real_distribution 

779 
setsum_left_distrib[symmetric] joint_distribution_commute[of X Y]) 

38656  780 

39097  781 
section {* Partitioning *} 
36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

782 

36624  783 
definition "subvimage A f g \<longleftrightarrow> (\<forall>x \<in> A. f ` {f x} \<inter> A \<subseteq> g ` {g x} \<inter> A)" 
784 

785 
lemma subvimageI: 

786 
assumes "\<And>x y. \<lbrakk> x \<in> A ; y \<in> A ; f x = f y \<rbrakk> \<Longrightarrow> g x = g y" 

787 
shows "subvimage A f g" 

788 
using assms unfolding subvimage_def by blast 

789 

790 
lemma subvimageE[consumes 1]: 

791 
assumes "subvimage A f g" 

792 
obtains "\<And>x y. \<lbrakk> x \<in> A ; y \<in> A ; f x = f y \<rbrakk> \<Longrightarrow> g x = g y" 

793 
using assms unfolding subvimage_def by blast 

794 

795 
lemma subvimageD: 

796 
"\<lbrakk> subvimage A f g ; x \<in> A ; y \<in> A ; f x = f y \<rbrakk> \<Longrightarrow> g x = g y" 

797 
using assms unfolding subvimage_def by blast 

798 

799 
lemma subvimage_subset: 

800 
"\<lbrakk> subvimage B f g ; A \<subseteq> B \<rbrakk> \<Longrightarrow> subvimage A f g" 

801 
unfolding subvimage_def by auto 

802 

803 
lemma subvimage_idem[intro]: "subvimage A g g" 

804 
by (safe intro!: subvimageI) 

805 

806 
lemma subvimage_comp_finer[intro]: 

807 
assumes svi: "subvimage A g h" 

808 
shows "subvimage A g (f \<circ> h)" 

809 
proof (rule subvimageI, simp) 

810 
fix x y assume "x \<in> A" "y \<in> A" "g x = g y" 

811 
from svi[THEN subvimageD, OF this] 

812 
show "f (h x) = f (h y)" by simp 

813 
qed 

814 

815 
lemma subvimage_comp_gran: 

816 
assumes svi: "subvimage A g h" 

817 
assumes inj: "inj_on f (g ` A)" 

818 
shows "subvimage A (f \<circ> g) h" 

819 
by (rule subvimageI) (auto intro!: subvimageD[OF svi] simp: inj_on_iff[OF inj]) 

820 

821 
lemma subvimage_comp: 

822 
assumes svi: "subvimage (f ` A) g h" 

823 
shows "subvimage A (g \<circ> f) (h \<circ> f)" 

824 
by (rule subvimageI) (auto intro!: svi[THEN subvimageD]) 

825 

826 
lemma subvimage_trans: 

827 
assumes fg: "subvimage A f g" 

828 
assumes gh: "subvimage A g h" 

829 
shows "subvimage A f h" 

830 
by (rule subvimageI) (auto intro!: fg[THEN subvimageD] gh[THEN subvimageD]) 

831 

832 
lemma subvimage_translator: 

833 
assumes svi: "subvimage A f g" 

834 
shows "\<exists>h. \<forall>x \<in> A. h (f x) = g x" 

835 
proof (safe intro!: exI[of _ "\<lambda>x. (THE z. z \<in> (g ` (f ` {x} \<inter> A)))"]) 

836 
fix x assume "x \<in> A" 

837 
show "(THE x'. x' \<in> (g ` (f ` {f x} \<inter> A))) = g x" 

838 
by (rule theI2[of _ "g x"]) 

839 
(insert `x \<in> A`, auto intro!: svi[THEN subvimageD]) 

840 
qed 

841 

842 
lemma subvimage_translator_image: 

843 
assumes svi: "subvimage A f g" 

844 
shows "\<exists>h. h ` f ` A = g ` A" 

845 
proof  

846 
from subvimage_translator[OF svi] 

847 
obtain h where "\<And>x. x \<in> A \<Longrightarrow> h (f x) = g x" by auto 

848 
thus ?thesis 

849 
by (auto intro!: exI[of _ h] 

850 
simp: image_compose[symmetric] comp_def cong: image_cong) 

851 
qed 

852 

853 
lemma subvimage_finite: 

854 
assumes svi: "subvimage A f g" and fin: "finite (f`A)" 

855 
shows "finite (g`A)" 

856 
proof  

857 
from subvimage_translator_image[OF svi] 

858 
obtain h where "g`A = h`f`A" by fastsimp 

859 
with fin show "finite (g`A)" by simp 

860 
qed 

861 

862 
lemma subvimage_disj: 

863 
assumes svi: "subvimage A f g" 

864 
shows "f ` {x} \<inter> A \<subseteq> g ` {y} \<inter> A \<or> 

865 
f ` {x} \<inter> g ` {y} \<inter> A = {}" (is "?sub \<or> ?dist") 

866 
proof (rule disjCI) 

867 
assume "\<not> ?dist" 

868 
then obtain z where "z \<in> A" and "x = f z" and "y = g z" by auto 

869 
thus "?sub" using svi unfolding subvimage_def by auto 

870 
qed 

871 

872 
lemma setsum_image_split: 

873 
assumes svi: "subvimage A f g" and fin: "finite (f ` A)" 

874 
shows "(\<Sum>x\<in>f`A. h x) = (\<Sum>y\<in>g`A. \<Sum>x\<in>f`(g ` {y} \<inter> A). h x)" 

875 
(is "?lhs = ?rhs") 

876 
proof  

877 
have "f ` A = 

878 
snd ` (SIGMA x : g ` A. f ` (g ` {x} \<inter> A))" 

879 
(is "_ = snd ` ?SIGMA") 

880 
unfolding image_split_eq_Sigma[symmetric] 

881 
by (simp add: image_compose[symmetric] comp_def) 

882 
moreover 

883 
have snd_inj: "inj_on snd ?SIGMA" 

884 
unfolding image_split_eq_Sigma[symmetric] 

885 
by (auto intro!: inj_onI subvimageD[OF svi]) 

886 
ultimately 

887 
have "(\<Sum>x\<in>f`A. h x) = (\<Sum>(x,y)\<in>?SIGMA. h y)" 

888 
by (auto simp: setsum_reindex intro: setsum_cong) 

889 
also have "... = ?rhs" 

890 
using subvimage_finite[OF svi fin] fin 

891 
apply (subst setsum_Sigma[symmetric]) 

892 
by (auto intro!: finite_subset[of _ "f`A"]) 

893 
finally show ?thesis . 

894 
qed 

895 

896 
lemma (in finite_information_space) entropy_partition: 

897 
assumes svi: "subvimage (space M) X P" 

898 
shows "\<H>(X) = \<H>(P) + \<H>(XP)" 

899 
proof  

38656  900 
have "(\<Sum>x\<in>X ` space M. real (distribution X {x}) * log b (real (distribution X {x}))) = 
36624  901 
(\<Sum>y\<in>P `space M. \<Sum>x\<in>X ` space M. 
38656  902 
real (joint_distribution X P {(x, y)}) * log b (real (joint_distribution X P {(x, y)})))" 
36624  903 
proof (subst setsum_image_split[OF svi], 
904 
safe intro!: finite_imageI finite_space setsum_mono_zero_cong_left imageI) 

905 
fix p x assume in_space: "p \<in> space M" "x \<in> space M" 

38656  906 
assume "real (joint_distribution X P {(X x, P p)}) * log b (real (joint_distribution X P {(X x, P p)})) \<noteq> 0" 
36624  907 
hence "(\<lambda>x. (X x, P x)) ` {(X x, P p)} \<inter> space M \<noteq> {}" by (auto simp: distribution_def) 
908 
with svi[unfolded subvimage_def, rule_format, OF `x \<in> space M`] 

909 
show "x \<in> P ` {P p}" by auto 

910 
next 

911 
fix p x assume in_space: "p \<in> space M" "x \<in> space M" 

912 
assume "P x = P p" 

913 
from this[symmetric] svi[unfolded subvimage_def, rule_format, OF `x \<in> space M`] 

914 
have "X ` {X x} \<inter> space M \<subseteq> P ` {P p} \<inter> space M" 

915 
by auto 

916 
hence "(\<lambda>x. (X x, P x)) ` {(X x, P p)} \<inter> space M = X ` {X x} \<inter> space M" 

917 
by auto 

38656  918 
thus "real (distribution X {X x}) * log b (real (distribution X {X x})) = 
919 
real (joint_distribution X P {(X x, P p)}) * 

920 
log b (real (joint_distribution X P {(X x, P p)}))" 

36624  921 
by (auto simp: distribution_def) 
922 
qed 

923 
thus ?thesis 

924 
unfolding entropy_eq conditional_entropy_eq 

38656  925 
by (simp add: setsum_cartesian_product' setsum_subtractf setsum_real_distribution 
36624  926 
setsum_left_distrib[symmetric] setsum_commute[where B="P`space M"]) 
927 
qed 

928 

929 
corollary (in finite_information_space) entropy_data_processing: 

930 
"\<H>(f \<circ> X) \<le> \<H>(X)" 

931 
by (subst (2) entropy_partition[of _ "f \<circ> X"]) (auto intro: conditional_entropy_positive) 

932 

933 
corollary (in finite_information_space) entropy_of_inj: 

934 
assumes "inj_on f (X`space M)" 

935 
shows "\<H>(f \<circ> X) = \<H>(X)" 

936 
proof (rule antisym) 

937 
show "\<H>(f \<circ> X) \<le> \<H>(X)" using entropy_data_processing . 

938 
next 

939 
have "\<H>(X) = \<H>(the_inv_into (X`space M) f \<circ> (f \<circ> X))" 

940 
by (auto intro!: mutual_information_cong simp: entropy_def the_inv_into_f_f[OF assms]) 

941 
also have "... \<le> \<H>(f \<circ> X)" 

942 
using entropy_data_processing . 

943 
finally show "\<H>(X) \<le> \<H>(f \<circ> X)" . 

944 
qed 

945 

36080
0d9affa4e73c
Added Information theory and Example: dining cryptographers
hoelzl
parents:
diff
changeset

946 
end 