src/HOL/Probability/Information.thy
author hoelzl
Wed Dec 01 19:20:30 2010 +0100 (2010-12-01)
changeset 40859 de0b30e6c2d2
parent 39302 d7728f65b353
child 41023 9118eb4eb8dc
permissions -rw-r--r--
Support product spaces on sigma finite measures.
Introduce the almost everywhere quantifier.
Introduce 'morphism' theorems for most constants.
Prove uniqueness of measures on cut stable generators.
Prove uniqueness of the Radon-Nikodym derivative.
Removed misleading suffix from borel_space and lebesgue_space.
Use product spaces to introduce Fubini on the Lebesgue integral.
Combine Euclidean_Lebesgue and Lebesgue_Measure.
Generalize theorems about mutual information and entropy to arbitrary probability spaces.
Remove simproc mult_log as it does not work with interpretations.
Introduce completion of measure spaces.
Change type of sigma.
Introduce dynkin systems.
hoelzl@36080
     1
theory Information
hoelzl@40859
     2
imports Probability_Space Convex Lebesgue_Measure
hoelzl@36080
     3
begin
hoelzl@36080
     4
hoelzl@39097
     5
lemma log_le: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log a x \<le> log a y"
hoelzl@39097
     6
  by (subst log_le_cancel_iff) auto
hoelzl@39097
     7
hoelzl@39097
     8
lemma log_less: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x < y \<Longrightarrow> log a x < log a y"
hoelzl@39097
     9
  by (subst log_less_cancel_iff) auto
hoelzl@39097
    10
hoelzl@39097
    11
lemma setsum_cartesian_product':
hoelzl@39097
    12
  "(\<Sum>x\<in>A \<times> B. f x) = (\<Sum>x\<in>A. setsum (\<lambda>y. f (x, y)) B)"
hoelzl@39097
    13
  unfolding setsum_cartesian_product by simp
hoelzl@39097
    14
hoelzl@36624
    15
section "Convex theory"
hoelzl@36080
    16
hoelzl@36624
    17
lemma log_setsum:
hoelzl@36624
    18
  assumes "finite s" "s \<noteq> {}"
hoelzl@36624
    19
  assumes "b > 1"
hoelzl@36624
    20
  assumes "(\<Sum> i \<in> s. a i) = 1"
hoelzl@36624
    21
  assumes "\<And> i. i \<in> s \<Longrightarrow> a i \<ge> 0"
hoelzl@36624
    22
  assumes "\<And> i. i \<in> s \<Longrightarrow> y i \<in> {0 <..}"
hoelzl@36624
    23
  shows "log b (\<Sum> i \<in> s. a i * y i) \<ge> (\<Sum> i \<in> s. a i * log b (y i))"
hoelzl@36624
    24
proof -
hoelzl@36624
    25
  have "convex_on {0 <..} (\<lambda> x. - log b x)"
hoelzl@36624
    26
    by (rule minus_log_convex[OF `b > 1`])
hoelzl@36624
    27
  hence "- log b (\<Sum> i \<in> s. a i * y i) \<le> (\<Sum> i \<in> s. a i * - log b (y i))"
hoelzl@36624
    28
    using convex_on_setsum[of _ _ "\<lambda> x. - log b x"] assms pos_is_convex by fastsimp
hoelzl@36624
    29
  thus ?thesis by (auto simp add:setsum_negf le_imp_neg_le)
hoelzl@36624
    30
qed
hoelzl@36080
    31
hoelzl@36624
    32
lemma log_setsum':
hoelzl@36624
    33
  assumes "finite s" "s \<noteq> {}"
hoelzl@36624
    34
  assumes "b > 1"
hoelzl@36624
    35
  assumes "(\<Sum> i \<in> s. a i) = 1"
hoelzl@36624
    36
  assumes pos: "\<And> i. i \<in> s \<Longrightarrow> 0 \<le> a i"
hoelzl@36624
    37
          "\<And> i. \<lbrakk> i \<in> s ; 0 < a i \<rbrakk> \<Longrightarrow> 0 < y i"
hoelzl@36624
    38
  shows "log b (\<Sum> i \<in> s. a i * y i) \<ge> (\<Sum> i \<in> s. a i * log b (y i))"
hoelzl@36080
    39
proof -
hoelzl@36624
    40
  have "\<And>y. (\<Sum> i \<in> s - {i. a i = 0}. a i * y i) = (\<Sum> i \<in> s. a i * y i)"
hoelzl@36624
    41
    using assms by (auto intro!: setsum_mono_zero_cong_left)
hoelzl@36624
    42
  moreover have "log b (\<Sum> i \<in> s - {i. a i = 0}. a i * y i) \<ge> (\<Sum> i \<in> s - {i. a i = 0}. a i * log b (y i))"
hoelzl@36624
    43
  proof (rule log_setsum)
hoelzl@36624
    44
    have "setsum a (s - {i. a i = 0}) = setsum a s"
hoelzl@36624
    45
      using assms(1) by (rule setsum_mono_zero_cong_left) auto
hoelzl@36624
    46
    thus sum_1: "setsum a (s - {i. a i = 0}) = 1"
hoelzl@36624
    47
      "finite (s - {i. a i = 0})" using assms by simp_all
hoelzl@36624
    48
hoelzl@36624
    49
    show "s - {i. a i = 0} \<noteq> {}"
hoelzl@36624
    50
    proof
hoelzl@36624
    51
      assume *: "s - {i. a i = 0} = {}"
hoelzl@36624
    52
      hence "setsum a (s - {i. a i = 0}) = 0" by (simp add: * setsum_empty)
hoelzl@36624
    53
      with sum_1 show False by simp
hoelzl@38656
    54
    qed
hoelzl@36624
    55
hoelzl@36624
    56
    fix i assume "i \<in> s - {i. a i = 0}"
hoelzl@36624
    57
    hence "i \<in> s" "a i \<noteq> 0" by simp_all
hoelzl@36624
    58
    thus "0 \<le> a i" "y i \<in> {0<..}" using pos[of i] by auto
hoelzl@36624
    59
  qed fact+
hoelzl@36624
    60
  ultimately show ?thesis by simp
hoelzl@36080
    61
qed
hoelzl@36080
    62
hoelzl@36624
    63
lemma log_setsum_divide:
hoelzl@36624
    64
  assumes "finite S" and "S \<noteq> {}" and "1 < b"
hoelzl@36624
    65
  assumes "(\<Sum>x\<in>S. g x) = 1"
hoelzl@36624
    66
  assumes pos: "\<And>x. x \<in> S \<Longrightarrow> g x \<ge> 0" "\<And>x. x \<in> S \<Longrightarrow> f x \<ge> 0"
hoelzl@36624
    67
  assumes g_pos: "\<And>x. \<lbrakk> x \<in> S ; 0 < g x \<rbrakk> \<Longrightarrow> 0 < f x"
hoelzl@36624
    68
  shows "- (\<Sum>x\<in>S. g x * log b (g x / f x)) \<le> log b (\<Sum>x\<in>S. f x)"
hoelzl@36624
    69
proof -
hoelzl@36624
    70
  have log_mono: "\<And>x y. 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log b x \<le> log b y"
hoelzl@36624
    71
    using `1 < b` by (subst log_le_cancel_iff) auto
hoelzl@36080
    72
hoelzl@36624
    73
  have "- (\<Sum>x\<in>S. g x * log b (g x / f x)) = (\<Sum>x\<in>S. g x * log b (f x / g x))"
hoelzl@36624
    74
  proof (unfold setsum_negf[symmetric], rule setsum_cong)
hoelzl@36624
    75
    fix x assume x: "x \<in> S"
hoelzl@36624
    76
    show "- (g x * log b (g x / f x)) = g x * log b (f x / g x)"
hoelzl@36624
    77
    proof (cases "g x = 0")
hoelzl@36624
    78
      case False
hoelzl@36624
    79
      with pos[OF x] g_pos[OF x] have "0 < f x" "0 < g x" by simp_all
hoelzl@36624
    80
      thus ?thesis using `1 < b` by (simp add: log_divide field_simps)
hoelzl@36624
    81
    qed simp
hoelzl@36624
    82
  qed rule
hoelzl@36624
    83
  also have "... \<le> log b (\<Sum>x\<in>S. g x * (f x / g x))"
hoelzl@36624
    84
  proof (rule log_setsum')
hoelzl@36624
    85
    fix x assume x: "x \<in> S" "0 < g x"
hoelzl@36624
    86
    with g_pos[OF x] show "0 < f x / g x" by (safe intro!: divide_pos_pos)
hoelzl@36624
    87
  qed fact+
hoelzl@36624
    88
  also have "... = log b (\<Sum>x\<in>S - {x. g x = 0}. f x)" using `finite S`
hoelzl@36624
    89
    by (auto intro!: setsum_mono_zero_cong_right arg_cong[where f="log b"]
hoelzl@36624
    90
        split: split_if_asm)
hoelzl@36624
    91
  also have "... \<le> log b (\<Sum>x\<in>S. f x)"
hoelzl@36624
    92
  proof (rule log_mono)
hoelzl@36624
    93
    have "0 = (\<Sum>x\<in>S - {x. g x = 0}. 0)" by simp
hoelzl@36624
    94
    also have "... < (\<Sum>x\<in>S - {x. g x = 0}. f x)" (is "_ < ?sum")
hoelzl@36624
    95
    proof (rule setsum_strict_mono)
hoelzl@36624
    96
      show "finite (S - {x. g x = 0})" using `finite S` by simp
hoelzl@36624
    97
      show "S - {x. g x = 0} \<noteq> {}"
hoelzl@36624
    98
      proof
hoelzl@36624
    99
        assume "S - {x. g x = 0} = {}"
hoelzl@36624
   100
        hence "(\<Sum>x\<in>S. g x) = 0" by (subst setsum_0') auto
hoelzl@36624
   101
        with `(\<Sum>x\<in>S. g x) = 1` show False by simp
hoelzl@36624
   102
      qed
hoelzl@36624
   103
      fix x assume "x \<in> S - {x. g x = 0}"
hoelzl@36624
   104
      thus "0 < f x" using g_pos[of x] pos(1)[of x] by auto
hoelzl@36624
   105
    qed
hoelzl@36624
   106
    finally show "0 < ?sum" .
hoelzl@36624
   107
    show "(\<Sum>x\<in>S - {x. g x = 0}. f x) \<le> (\<Sum>x\<in>S. f x)"
hoelzl@36624
   108
      using `finite S` pos by (auto intro!: setsum_mono2)
hoelzl@36080
   109
  qed
hoelzl@36624
   110
  finally show ?thesis .
hoelzl@36080
   111
qed
hoelzl@36080
   112
hoelzl@39097
   113
lemma split_pairs:
hoelzl@40859
   114
  "((A, B) = X) \<longleftrightarrow> (fst X = A \<and> snd X = B)" and
hoelzl@40859
   115
  "(X = (A, B)) \<longleftrightarrow> (fst X = A \<and> snd X = B)" by auto
hoelzl@38656
   116
hoelzl@38656
   117
section "Information theory"
hoelzl@38656
   118
hoelzl@40859
   119
locale information_space = prob_space +
hoelzl@38656
   120
  fixes b :: real assumes b_gt_1: "1 < b"
hoelzl@38656
   121
hoelzl@40859
   122
context information_space
hoelzl@38656
   123
begin
hoelzl@38656
   124
hoelzl@40859
   125
text {* Introduce some simplification rules for logarithm of base @{term b}. *}
hoelzl@40859
   126
hoelzl@40859
   127
lemma log_neg_const:
hoelzl@40859
   128
  assumes "x \<le> 0"
hoelzl@40859
   129
  shows "log b x = log b 0"
hoelzl@36624
   130
proof -
hoelzl@40859
   131
  { fix u :: real
hoelzl@40859
   132
    have "x \<le> 0" by fact
hoelzl@40859
   133
    also have "0 < exp u"
hoelzl@40859
   134
      using exp_gt_zero .
hoelzl@40859
   135
    finally have "exp u \<noteq> x"
hoelzl@40859
   136
      by auto }
hoelzl@40859
   137
  then show "log b x = log b 0"
hoelzl@40859
   138
    by (simp add: log_def ln_def)
hoelzl@38656
   139
qed
hoelzl@38656
   140
hoelzl@40859
   141
lemma log_mult_eq:
hoelzl@40859
   142
  "log b (A * B) = (if 0 < A * B then log b \<bar>A\<bar> + log b \<bar>B\<bar> else log b 0)"
hoelzl@40859
   143
  using log_mult[of b "\<bar>A\<bar>" "\<bar>B\<bar>"] b_gt_1 log_neg_const[of "A * B"]
hoelzl@40859
   144
  by (auto simp: zero_less_mult_iff mult_le_0_iff)
hoelzl@38656
   145
hoelzl@40859
   146
lemma log_inverse_eq:
hoelzl@40859
   147
  "log b (inverse B) = (if 0 < B then - log b B else log b 0)"
hoelzl@40859
   148
  using log_inverse[of b B] log_neg_const[of "inverse B"] b_gt_1 by simp
hoelzl@36080
   149
hoelzl@40859
   150
lemma log_divide_eq:
hoelzl@40859
   151
  "log b (A / B) = (if 0 < A * B then log b \<bar>A\<bar> - log b \<bar>B\<bar> else log b 0)"
hoelzl@40859
   152
  unfolding divide_inverse log_mult_eq log_inverse_eq abs_inverse
hoelzl@40859
   153
  by (auto simp: zero_less_mult_iff mult_le_0_iff)
hoelzl@38656
   154
hoelzl@40859
   155
lemmas log_simps = log_mult_eq log_inverse_eq log_divide_eq
hoelzl@38656
   156
hoelzl@38656
   157
end
hoelzl@38656
   158
hoelzl@39097
   159
subsection "Kullback$-$Leibler divergence"
hoelzl@36080
   160
hoelzl@39097
   161
text {* The Kullback$-$Leibler divergence is also known as relative entropy or
hoelzl@39097
   162
Kullback$-$Leibler distance. *}
hoelzl@39097
   163
hoelzl@39097
   164
definition
hoelzl@39097
   165
  "KL_divergence b M \<mu> \<nu> =
hoelzl@39097
   166
    measure_space.integral M \<mu> (\<lambda>x. log b (real (sigma_finite_measure.RN_deriv M \<nu> \<mu> x)))"
hoelzl@38656
   167
hoelzl@40859
   168
lemma (in sigma_finite_measure) KL_divergence_cong:
hoelzl@40859
   169
  assumes "measure_space M \<nu>"
hoelzl@40859
   170
  and cong: "\<And>A. A \<in> sets M \<Longrightarrow> \<mu>' A = \<mu> A" "\<And>A. A \<in> sets M \<Longrightarrow> \<nu>' A = \<nu> A"
hoelzl@40859
   171
  shows "KL_divergence b M \<nu>' \<mu>' = KL_divergence b M \<nu> \<mu>"
hoelzl@40859
   172
proof -
hoelzl@40859
   173
  interpret \<nu>: measure_space M \<nu> by fact
hoelzl@40859
   174
  show ?thesis
hoelzl@40859
   175
    unfolding KL_divergence_def
hoelzl@40859
   176
    using RN_deriv_cong[OF cong, of "\<lambda>A. A"]
hoelzl@40859
   177
    by (simp add: cong \<nu>.integral_cong_measure[OF cong(2)])
hoelzl@40859
   178
qed
hoelzl@40859
   179
hoelzl@40859
   180
lemma (in sigma_finite_measure) KL_divergence_vimage:
hoelzl@40859
   181
  assumes f: "bij_betw f S (space M)"
hoelzl@40859
   182
  assumes \<nu>: "measure_space M \<nu>" "absolutely_continuous \<nu>"
hoelzl@40859
   183
  shows "KL_divergence b (vimage_algebra S f) (\<lambda>A. \<nu> (f ` A)) (\<lambda>A. \<mu> (f ` A)) = KL_divergence b M \<nu> \<mu>"
hoelzl@40859
   184
    (is "KL_divergence b ?M ?\<nu> ?\<mu> = _")
hoelzl@40859
   185
proof -
hoelzl@40859
   186
  interpret \<nu>: measure_space M \<nu> by fact
hoelzl@40859
   187
  interpret v: measure_space ?M ?\<nu>
hoelzl@40859
   188
    using f by (rule \<nu>.measure_space_isomorphic)
hoelzl@40859
   189
hoelzl@40859
   190
  let ?RN = "sigma_finite_measure.RN_deriv ?M ?\<mu> ?\<nu>"
hoelzl@40859
   191
  from RN_deriv_vimage[OF f \<nu>]
hoelzl@40859
   192
  have *: "\<nu>.almost_everywhere (\<lambda>x. ?RN (the_inv_into S f x) = RN_deriv \<nu> x)"
hoelzl@40859
   193
    by (rule absolutely_continuous_AE[OF \<nu>])
hoelzl@40859
   194
hoelzl@40859
   195
  show ?thesis
hoelzl@40859
   196
    unfolding KL_divergence_def \<nu>.integral_vimage_inv[OF f]
hoelzl@40859
   197
    apply (rule \<nu>.integral_cong_AE)
hoelzl@40859
   198
    apply (rule \<nu>.AE_mp[OF *])
hoelzl@40859
   199
    apply (rule \<nu>.AE_cong)
hoelzl@40859
   200
    apply simp
hoelzl@40859
   201
    done
hoelzl@40859
   202
qed
hoelzl@40859
   203
hoelzl@38656
   204
lemma (in finite_measure_space) KL_divergence_eq_finite:
hoelzl@38656
   205
  assumes v: "finite_measure_space M \<nu>"
hoelzl@40859
   206
  assumes ac: "absolutely_continuous \<nu>"
hoelzl@38656
   207
  shows "KL_divergence b M \<nu> \<mu> = (\<Sum>x\<in>space M. real (\<nu> {x}) * log b (real (\<nu> {x}) / real (\<mu> {x})))" (is "_ = ?sum")
hoelzl@38656
   208
proof (simp add: KL_divergence_def finite_measure_space.integral_finite_singleton[OF v])
hoelzl@38656
   209
  interpret v: finite_measure_space M \<nu> by fact
hoelzl@38656
   210
  have ms: "measure_space M \<nu>" by fact
hoelzl@38656
   211
  show "(\<Sum>x \<in> space M. log b (real (RN_deriv \<nu> x)) * real (\<nu> {x})) = ?sum"
hoelzl@38656
   212
    using RN_deriv_finite_measure[OF ms ac]
hoelzl@38656
   213
    by (auto intro!: setsum_cong simp: field_simps real_of_pinfreal_mult[symmetric])
hoelzl@38656
   214
qed
hoelzl@36080
   215
hoelzl@38656
   216
lemma (in finite_prob_space) KL_divergence_positive_finite:
hoelzl@38656
   217
  assumes v: "finite_prob_space M \<nu>"
hoelzl@40859
   218
  assumes ac: "absolutely_continuous \<nu>"
hoelzl@38656
   219
  and "1 < b"
hoelzl@38656
   220
  shows "0 \<le> KL_divergence b M \<nu> \<mu>"
hoelzl@38656
   221
proof -
hoelzl@38656
   222
  interpret v: finite_prob_space M \<nu> using v .
hoelzl@40859
   223
  have ms: "finite_measure_space M \<nu>" by default
hoelzl@38656
   224
hoelzl@40859
   225
  have "- (KL_divergence b M \<nu> \<mu>) \<le> log b (\<Sum>x\<in>space M. real (\<mu> {x}))"
hoelzl@40859
   226
  proof (subst KL_divergence_eq_finite[OF ms ac], safe intro!: log_setsum_divide not_empty)
hoelzl@40859
   227
    show "finite (space M)" using finite_space by simp
hoelzl@40859
   228
    show "1 < b" by fact
hoelzl@40859
   229
    show "(\<Sum>x\<in>space M. real (\<nu> {x})) = 1" using v.finite_sum_over_space_eq_1 by simp
hoelzl@38656
   230
hoelzl@40859
   231
    fix x assume "x \<in> space M"
hoelzl@40859
   232
    then have x: "{x} \<in> sets M" unfolding sets_eq_Pow by auto
hoelzl@40859
   233
    { assume "0 < real (\<nu> {x})"
hoelzl@40859
   234
      then have "\<nu> {x} \<noteq> 0" by auto
hoelzl@40859
   235
      then have "\<mu> {x} \<noteq> 0"
hoelzl@40859
   236
        using ac[unfolded absolutely_continuous_def, THEN bspec, of "{x}"] x by auto
hoelzl@40859
   237
      thus "0 < prob {x}" using finite_measure[of "{x}"] x by auto }
hoelzl@40859
   238
  qed auto
hoelzl@38656
   239
  thus "0 \<le> KL_divergence b M \<nu> \<mu>" using finite_sum_over_space_eq_1 by simp
hoelzl@36080
   240
qed
hoelzl@36080
   241
hoelzl@39097
   242
subsection {* Mutual Information *}
hoelzl@39097
   243
hoelzl@36080
   244
definition (in prob_space)
hoelzl@38656
   245
  "mutual_information b S T X Y =
hoelzl@40859
   246
    KL_divergence b (sigma (pair_algebra S T))
hoelzl@38656
   247
      (joint_distribution X Y)
hoelzl@40859
   248
      (pair_sigma_finite.pair_measure S (distribution X) T (distribution Y))"
hoelzl@36080
   249
hoelzl@40859
   250
definition (in prob_space)
hoelzl@40859
   251
  "entropy b s X = mutual_information b s s X X"
hoelzl@40859
   252
hoelzl@40859
   253
abbreviation (in information_space)
hoelzl@40859
   254
  mutual_information_Pow ("\<I>'(_ ; _')") where
hoelzl@36624
   255
  "\<I>(X ; Y) \<equiv> mutual_information b
hoelzl@36080
   256
    \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr>
hoelzl@36080
   257
    \<lparr> space = Y`space M, sets = Pow (Y`space M) \<rparr> X Y"
hoelzl@36080
   258
hoelzl@40859
   259
lemma (in information_space) mutual_information_commute_generic:
hoelzl@40859
   260
  assumes X: "random_variable S X" and Y: "random_variable T Y"
hoelzl@40859
   261
  assumes ac: "measure_space.absolutely_continuous (sigma (pair_algebra S T))
hoelzl@40859
   262
   (pair_sigma_finite.pair_measure S (distribution X) T (distribution Y)) (joint_distribution X Y)"
hoelzl@40859
   263
  shows "mutual_information b S T X Y = mutual_information b T S Y X"
hoelzl@39092
   264
proof -
hoelzl@40859
   265
  interpret P: prob_space "sigma (pair_algebra S T)" "joint_distribution X Y"
hoelzl@40859
   266
    using random_variable_pairI[OF X Y] by (rule distribution_prob_space)
hoelzl@40859
   267
  interpret Q: prob_space "sigma (pair_algebra T S)" "joint_distribution Y X"
hoelzl@40859
   268
    using random_variable_pairI[OF Y X] by (rule distribution_prob_space)
hoelzl@40859
   269
  interpret X: prob_space S "distribution X" using X by (rule distribution_prob_space)
hoelzl@40859
   270
  interpret Y: prob_space T "distribution Y" using Y by (rule distribution_prob_space)
hoelzl@40859
   271
  interpret ST: pair_sigma_finite S "distribution X" T "distribution Y" by default
hoelzl@40859
   272
  interpret TS: pair_sigma_finite T "distribution Y" S "distribution X" by default
hoelzl@40859
   273
hoelzl@40859
   274
  have ST: "measure_space (sigma (pair_algebra S T)) (joint_distribution X Y)" by default
hoelzl@40859
   275
  have TS: "measure_space (sigma (pair_algebra T S)) (joint_distribution Y X)" by default
hoelzl@40859
   276
hoelzl@40859
   277
  have bij_ST: "bij_betw (\<lambda>(x, y). (y, x)) (space (sigma (pair_algebra S T))) (space (sigma (pair_algebra T S)))"
hoelzl@40859
   278
    by (auto intro!: inj_onI simp: space_pair_algebra bij_betw_def)
hoelzl@40859
   279
  have bij_TS: "bij_betw (\<lambda>(x, y). (y, x)) (space (sigma (pair_algebra T S))) (space (sigma (pair_algebra S T)))"
hoelzl@40859
   280
    by (auto intro!: inj_onI simp: space_pair_algebra bij_betw_def)
hoelzl@40859
   281
hoelzl@40859
   282
  { fix A
hoelzl@40859
   283
    have "joint_distribution X Y ((\<lambda>(x, y). (y, x)) ` A) = joint_distribution Y X A"
hoelzl@40859
   284
      unfolding distribution_def by (auto intro!: arg_cong[where f=\<mu>]) }
hoelzl@40859
   285
  note jd_commute = this
hoelzl@40859
   286
hoelzl@40859
   287
  { fix A assume A: "A \<in> sets (sigma (pair_algebra T S))"
hoelzl@40859
   288
    have *: "\<And>x y. indicator ((\<lambda>(x, y). (y, x)) ` A) (x, y) = (indicator A (y, x) :: pinfreal)"
hoelzl@40859
   289
      unfolding indicator_def by auto
hoelzl@40859
   290
    have "ST.pair_measure ((\<lambda>(x, y). (y, x)) ` A) = TS.pair_measure A"
hoelzl@40859
   291
      unfolding ST.pair_measure_def TS.pair_measure_def
hoelzl@40859
   292
      using A by (auto simp add: TS.Fubini[symmetric] *) }
hoelzl@40859
   293
  note pair_measure_commute = this
hoelzl@40859
   294
hoelzl@39092
   295
  show ?thesis
hoelzl@40859
   296
    unfolding mutual_information_def
hoelzl@40859
   297
    unfolding ST.KL_divergence_vimage[OF bij_TS ST ac, symmetric]
hoelzl@40859
   298
    unfolding space_sigma space_pair_algebra jd_commute
hoelzl@40859
   299
    unfolding ST.pair_sigma_algebra_swap[symmetric]
hoelzl@40859
   300
    by (simp cong: TS.KL_divergence_cong[OF TS] add: pair_measure_commute)
hoelzl@39092
   301
qed
hoelzl@39092
   302
hoelzl@40859
   303
lemma (in prob_space) finite_variables_absolutely_continuous:
hoelzl@40859
   304
  assumes X: "finite_random_variable S X" and Y: "finite_random_variable T Y"
hoelzl@40859
   305
  shows "measure_space.absolutely_continuous (sigma (pair_algebra S T))
hoelzl@40859
   306
   (pair_sigma_finite.pair_measure S (distribution X) T (distribution Y)) (joint_distribution X Y)"
hoelzl@40859
   307
proof -
hoelzl@40859
   308
  interpret X: finite_prob_space S "distribution X" using X by (rule distribution_finite_prob_space)
hoelzl@40859
   309
  interpret Y: finite_prob_space T "distribution Y" using Y by (rule distribution_finite_prob_space)
hoelzl@40859
   310
  interpret XY: pair_finite_prob_space S "distribution X" T "distribution Y" by default
hoelzl@40859
   311
  interpret P: finite_prob_space XY.P "joint_distribution X Y"
hoelzl@40859
   312
    using assms by (intro joint_distribution_finite_prob_space)
hoelzl@40859
   313
  show "XY.absolutely_continuous (joint_distribution X Y)"
hoelzl@40859
   314
  proof (rule XY.absolutely_continuousI)
hoelzl@40859
   315
    show "finite_measure_space XY.P (joint_distribution X Y)" by default
hoelzl@40859
   316
    fix x assume "x \<in> space XY.P" and "XY.pair_measure {x} = 0"
hoelzl@40859
   317
    then obtain a b where "(a, b) = x" and "a \<in> space S" "b \<in> space T"
hoelzl@40859
   318
      and distr: "distribution X {a} * distribution Y {b} = 0"
hoelzl@40859
   319
      by (cases x) (auto simp: pair_algebra_def)
hoelzl@40859
   320
    with assms[THEN finite_random_variableD]
hoelzl@40859
   321
      joint_distribution_Times_le_fst[of S X T Y "{a}" "{b}"]
hoelzl@40859
   322
      joint_distribution_Times_le_snd[of S X T Y "{a}" "{b}"]
hoelzl@40859
   323
    have "joint_distribution X Y {x} \<le> distribution Y {b}"
hoelzl@40859
   324
         "joint_distribution X Y {x} \<le> distribution X {a}"
hoelzl@40859
   325
      by auto
hoelzl@40859
   326
    with distr show "joint_distribution X Y {x} = 0" by auto
hoelzl@40859
   327
  qed
hoelzl@40859
   328
qed
hoelzl@40859
   329
hoelzl@40859
   330
lemma (in information_space) mutual_information_commute:
hoelzl@40859
   331
  assumes X: "finite_random_variable S X" and Y: "finite_random_variable T Y"
hoelzl@40859
   332
  shows "mutual_information b S T X Y = mutual_information b T S Y X"
hoelzl@40859
   333
  by (intro finite_random_variableD X Y mutual_information_commute_generic finite_variables_absolutely_continuous)
hoelzl@40859
   334
hoelzl@40859
   335
lemma (in information_space) mutual_information_commute_simple:
hoelzl@40859
   336
  assumes X: "simple_function X" and Y: "simple_function Y"
hoelzl@40859
   337
  shows "\<I>(X;Y) = \<I>(Y;X)"
hoelzl@40859
   338
  by (intro X Y simple_function_imp_finite_random_variable mutual_information_commute)
hoelzl@40859
   339
hoelzl@40859
   340
lemma (in information_space)
hoelzl@40859
   341
  assumes MX: "finite_random_variable MX X"
hoelzl@40859
   342
  assumes MY: "finite_random_variable MY Y"
hoelzl@40859
   343
  shows mutual_information_generic_eq:
hoelzl@36624
   344
    "mutual_information b MX MY X Y = (\<Sum> (x,y) \<in> space MX \<times> space MY.
hoelzl@38656
   345
      real (joint_distribution X Y {(x,y)}) *
hoelzl@38656
   346
      log b (real (joint_distribution X Y {(x,y)}) /
hoelzl@38656
   347
      (real (distribution X {x}) * real (distribution Y {y}))))"
hoelzl@40859
   348
    (is ?sum)
hoelzl@36624
   349
  and mutual_information_positive_generic:
hoelzl@40859
   350
     "0 \<le> mutual_information b MX MY X Y" (is ?positive)
hoelzl@36624
   351
proof -
hoelzl@40859
   352
  interpret X: finite_prob_space MX "distribution X" using MX by (rule distribution_finite_prob_space)
hoelzl@40859
   353
  interpret Y: finite_prob_space MY "distribution Y" using MY by (rule distribution_finite_prob_space)
hoelzl@40859
   354
  interpret XY: pair_finite_prob_space MX "distribution X" MY "distribution Y" by default
hoelzl@40859
   355
  interpret P: finite_prob_space XY.P "joint_distribution X Y"
hoelzl@40859
   356
    using assms by (intro joint_distribution_finite_prob_space)
hoelzl@36080
   357
hoelzl@40859
   358
  have P_ms: "finite_measure_space XY.P (joint_distribution X Y)" by default
hoelzl@40859
   359
  have P_ps: "finite_prob_space XY.P (joint_distribution X Y)" by default
hoelzl@36624
   360
hoelzl@40859
   361
  show ?sum
hoelzl@38656
   362
    unfolding Let_def mutual_information_def
hoelzl@40859
   363
    by (subst XY.KL_divergence_eq_finite[OF P_ms finite_variables_absolutely_continuous[OF MX MY]])
hoelzl@40859
   364
       (auto simp add: pair_algebra_def setsum_cartesian_product' real_of_pinfreal_mult[symmetric])
hoelzl@36080
   365
hoelzl@36624
   366
  show ?positive
hoelzl@40859
   367
    using XY.KL_divergence_positive_finite[OF P_ps finite_variables_absolutely_continuous[OF MX MY] b_gt_1]
hoelzl@40859
   368
    unfolding mutual_information_def .
hoelzl@36080
   369
qed
hoelzl@36080
   370
hoelzl@40859
   371
lemma (in information_space) mutual_information_eq:
hoelzl@40859
   372
  assumes "simple_function X" "simple_function Y"
hoelzl@40859
   373
  shows "\<I>(X;Y) = (\<Sum> (x,y) \<in> X ` space M \<times> Y ` space M.
hoelzl@38656
   374
    real (distribution (\<lambda>x. (X x, Y x)) {(x,y)}) * log b (real (distribution (\<lambda>x. (X x, Y x)) {(x,y)}) /
hoelzl@38656
   375
                                                   (real (distribution X {x}) * real (distribution Y {y}))))"
hoelzl@40859
   376
  using assms by (simp add: mutual_information_generic_eq)
hoelzl@36080
   377
hoelzl@40859
   378
lemma (in information_space) mutual_information_generic_cong:
hoelzl@39097
   379
  assumes X: "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
hoelzl@39097
   380
  assumes Y: "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
hoelzl@40859
   381
  shows "mutual_information b MX MY X Y = mutual_information b MX MY X' Y'"
hoelzl@40859
   382
  unfolding mutual_information_def using X Y
hoelzl@40859
   383
  by (simp cong: distribution_cong)
hoelzl@39097
   384
hoelzl@40859
   385
lemma (in information_space) mutual_information_cong:
hoelzl@40859
   386
  assumes X: "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
hoelzl@40859
   387
  assumes Y: "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
hoelzl@40859
   388
  shows "\<I>(X; Y) = \<I>(X'; Y')"
hoelzl@40859
   389
  unfolding mutual_information_def using X Y
hoelzl@40859
   390
  by (simp cong: distribution_cong image_cong)
hoelzl@40859
   391
hoelzl@40859
   392
lemma (in information_space) mutual_information_positive:
hoelzl@40859
   393
  assumes "simple_function X" "simple_function Y"
hoelzl@40859
   394
  shows "0 \<le> \<I>(X;Y)"
hoelzl@40859
   395
  using assms by (simp add: mutual_information_positive_generic)
hoelzl@36080
   396
hoelzl@39097
   397
subsection {* Entropy *}
hoelzl@39097
   398
hoelzl@40859
   399
abbreviation (in information_space)
hoelzl@40859
   400
  entropy_Pow ("\<H>'(_')") where
hoelzl@36624
   401
  "\<H>(X) \<equiv> entropy b \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr> X"
hoelzl@36080
   402
hoelzl@40859
   403
lemma (in information_space) entropy_generic_eq:
hoelzl@40859
   404
  assumes MX: "finite_random_variable MX X"
hoelzl@39097
   405
  shows "entropy b MX X = -(\<Sum> x \<in> space MX. real (distribution X {x}) * log b (real (distribution X {x})))"
hoelzl@39097
   406
proof -
hoelzl@40859
   407
  interpret MX: finite_prob_space MX "distribution X" using MX by (rule distribution_finite_prob_space)
hoelzl@39097
   408
  let "?X x" = "real (distribution X {x})"
hoelzl@39097
   409
  let "?XX x y" = "real (joint_distribution X X {(x, y)})"
hoelzl@39097
   410
  { fix x y
hoelzl@39097
   411
    have "(\<lambda>x. (X x, X x)) -` {(x, y)} = (if x = y then X -` {x} else {})" by auto
hoelzl@39097
   412
    then have "?XX x y * log b (?XX x y / (?X x * ?X y)) =
hoelzl@39097
   413
        (if x = y then - ?X y * log b (?X y) else 0)"
hoelzl@40859
   414
      unfolding distribution_def by (auto simp: log_simps zero_less_mult_iff) }
hoelzl@39097
   415
  note remove_XX = this
hoelzl@39097
   416
  show ?thesis
hoelzl@39097
   417
    unfolding entropy_def mutual_information_generic_eq[OF MX MX]
hoelzl@39097
   418
    unfolding setsum_cartesian_product[symmetric] setsum_negf[symmetric] remove_XX
hoelzl@39097
   419
    by (auto simp: setsum_cases MX.finite_space)
hoelzl@39097
   420
qed
hoelzl@36624
   421
hoelzl@40859
   422
lemma (in information_space) entropy_eq:
hoelzl@40859
   423
  assumes "simple_function X"
hoelzl@40859
   424
  shows "\<H>(X) = -(\<Sum> x \<in> X ` space M. real (distribution X {x}) * log b (real (distribution X {x})))"
hoelzl@40859
   425
  using assms by (simp add: entropy_generic_eq)
hoelzl@36080
   426
hoelzl@40859
   427
lemma (in information_space) entropy_positive:
hoelzl@40859
   428
  "simple_function X \<Longrightarrow> 0 \<le> \<H>(X)"
hoelzl@40859
   429
  unfolding entropy_def by (simp add: mutual_information_positive)
hoelzl@36080
   430
hoelzl@40859
   431
lemma (in information_space) entropy_certainty_eq_0:
hoelzl@40859
   432
  assumes "simple_function X" and "x \<in> X ` space M" and "distribution X {x} = 1"
hoelzl@39097
   433
  shows "\<H>(X) = 0"
hoelzl@39097
   434
proof -
hoelzl@39097
   435
  interpret X: finite_prob_space "\<lparr> space = X ` space M, sets = Pow (X ` space M) \<rparr>" "distribution X"
hoelzl@40859
   436
    using simple_function_imp_finite_random_variable[OF `simple_function X`]
hoelzl@40859
   437
    by (rule distribution_finite_prob_space)
hoelzl@39097
   438
  have "distribution X (X ` space M - {x}) = distribution X (X ` space M) - distribution X {x}"
hoelzl@39097
   439
    using X.measure_compl[of "{x}"] assms by auto
hoelzl@39097
   440
  also have "\<dots> = 0" using X.prob_space assms by auto
hoelzl@39097
   441
  finally have X0: "distribution X (X ` space M - {x}) = 0" by auto
hoelzl@39097
   442
  { fix y assume asm: "y \<noteq> x" "y \<in> X ` space M"
hoelzl@39097
   443
    hence "{y} \<subseteq> X ` space M - {x}" by auto
hoelzl@39097
   444
    from X.measure_mono[OF this] X0 asm
hoelzl@39097
   445
    have "distribution X {y} = 0" by auto }
hoelzl@39097
   446
  hence fi: "\<And> y. y \<in> X ` space M \<Longrightarrow> real (distribution X {y}) = (if x = y then 1 else 0)"
hoelzl@39097
   447
    using assms by auto
hoelzl@39097
   448
  have y: "\<And>y. (if x = y then 1 else 0) * log b (if x = y then 1 else 0) = 0" by simp
hoelzl@40859
   449
  show ?thesis unfolding entropy_eq[OF `simple_function X`] by (auto simp: y fi)
hoelzl@39097
   450
qed
hoelzl@39097
   451
hoelzl@40859
   452
lemma (in information_space) entropy_le_card_not_0:
hoelzl@40859
   453
  assumes "simple_function X"
hoelzl@40859
   454
  shows "\<H>(X) \<le> log b (real (card (X ` space M \<inter> {x . distribution X {x} \<noteq> 0})))"
hoelzl@39097
   455
proof -
hoelzl@39097
   456
  let "?d x" = "distribution X {x}"
hoelzl@39097
   457
  let "?p x" = "real (?d x)"
hoelzl@39097
   458
  have "\<H>(X) = (\<Sum>x\<in>X`space M. ?p x * log b (1 / ?p x))"
hoelzl@40859
   459
    by (auto intro!: setsum_cong simp: entropy_eq[OF `simple_function X`] setsum_negf[symmetric] log_simps not_less)
hoelzl@39097
   460
  also have "\<dots> \<le> log b (\<Sum>x\<in>X`space M. ?p x * (1 / ?p x))"
hoelzl@39097
   461
    apply (rule log_setsum')
hoelzl@40859
   462
    using not_empty b_gt_1 `simple_function X` sum_over_space_real_distribution
hoelzl@40859
   463
    by (auto simp: simple_function_def)
hoelzl@39097
   464
  also have "\<dots> = log b (\<Sum>x\<in>X`space M. if ?d x \<noteq> 0 then 1 else 0)"
hoelzl@40859
   465
    using distribution_finite[OF `simple_function X`[THEN simple_function_imp_random_variable], simplified]
hoelzl@40859
   466
    by (intro arg_cong[where f="\<lambda>X. log b X"] setsum_cong) (auto simp: real_of_pinfreal_eq_0)
hoelzl@39097
   467
  finally show ?thesis
hoelzl@40859
   468
    using `simple_function X` by (auto simp: setsum_cases real_eq_of_nat simple_function_def)
hoelzl@39097
   469
qed
hoelzl@39097
   470
hoelzl@40859
   471
lemma (in information_space) entropy_uniform_max:
hoelzl@40859
   472
  assumes "simple_function X"
hoelzl@39097
   473
  assumes "\<And>x y. \<lbrakk> x \<in> X ` space M ; y \<in> X ` space M \<rbrakk> \<Longrightarrow> distribution X {x} = distribution X {y}"
hoelzl@39097
   474
  shows "\<H>(X) = log b (real (card (X ` space M)))"
hoelzl@39097
   475
proof -
hoelzl@40859
   476
  interpret X: finite_prob_space "\<lparr> space = X ` space M, sets = Pow (X ` space M) \<rparr>" "distribution X"
hoelzl@40859
   477
    using simple_function_imp_finite_random_variable[OF `simple_function X`]
hoelzl@40859
   478
    by (rule distribution_finite_prob_space)
hoelzl@39097
   479
  have card_gt0: "0 < card (X ` space M)" unfolding card_gt_0_iff
hoelzl@40859
   480
    using `simple_function X` not_empty by (auto simp: simple_function_def)
hoelzl@39097
   481
  { fix x assume "x \<in> X ` space M"
hoelzl@39097
   482
    hence "real (distribution X {x}) = 1 / real (card (X ` space M))"
hoelzl@40859
   483
    proof (rule X.uniform_prob[simplified])
hoelzl@39097
   484
      fix x y assume "x \<in> X`space M" "y \<in> X`space M"
hoelzl@40859
   485
      from assms(2)[OF this] show "real (distribution X {x}) = real (distribution X {y})" by simp
hoelzl@39097
   486
    qed }
hoelzl@39097
   487
  thus ?thesis
hoelzl@40859
   488
    using not_empty X.finite_space b_gt_1 card_gt0
hoelzl@40859
   489
    by (simp add: entropy_eq[OF `simple_function X`] real_eq_of_nat[symmetric] log_simps)
hoelzl@39097
   490
qed
hoelzl@39097
   491
hoelzl@40859
   492
lemma (in information_space) entropy_le_card:
hoelzl@40859
   493
  assumes "simple_function X"
hoelzl@40859
   494
  shows "\<H>(X) \<le> log b (real (card (X ` space M)))"
hoelzl@39097
   495
proof cases
hoelzl@39097
   496
  assume "X ` space M \<inter> {x. distribution X {x} \<noteq> 0} = {}"
hoelzl@39097
   497
  then have "\<And>x. x\<in>X`space M \<Longrightarrow> distribution X {x} = 0" by auto
hoelzl@39097
   498
  moreover
hoelzl@39097
   499
  have "0 < card (X`space M)"
hoelzl@40859
   500
    using `simple_function X` not_empty
hoelzl@40859
   501
    by (auto simp: card_gt_0_iff simple_function_def)
hoelzl@39097
   502
  then have "log b 1 \<le> log b (real (card (X`space M)))"
hoelzl@39097
   503
    using b_gt_1 by (intro log_le) auto
hoelzl@40859
   504
  ultimately show ?thesis using assms by (simp add: entropy_eq)
hoelzl@39097
   505
next
hoelzl@39097
   506
  assume False: "X ` space M \<inter> {x. distribution X {x} \<noteq> 0} \<noteq> {}"
hoelzl@39097
   507
  have "card (X ` space M \<inter> {x. distribution X {x} \<noteq> 0}) \<le> card (X ` space M)"
hoelzl@40859
   508
    (is "?A \<le> ?B") using assms not_empty by (auto intro!: card_mono simp: simple_function_def)
hoelzl@40859
   509
  note entropy_le_card_not_0[OF assms]
hoelzl@39097
   510
  also have "log b (real ?A) \<le> log b (real ?B)"
hoelzl@40859
   511
    using b_gt_1 False not_empty `?A \<le> ?B` assms
hoelzl@40859
   512
    by (auto intro!: log_le simp: card_gt_0_iff simp: simple_function_def)
hoelzl@39097
   513
  finally show ?thesis .
hoelzl@39097
   514
qed
hoelzl@39097
   515
hoelzl@40859
   516
lemma (in information_space) entropy_commute:
hoelzl@40859
   517
  assumes "simple_function X" "simple_function Y"
hoelzl@40859
   518
  shows "\<H>(\<lambda>x. (X x, Y x)) = \<H>(\<lambda>x. (Y x, X x))"
hoelzl@39097
   519
proof -
hoelzl@40859
   520
  have sf: "simple_function (\<lambda>x. (X x, Y x))" "simple_function (\<lambda>x. (Y x, X x))"
hoelzl@40859
   521
    using assms by (auto intro: simple_function_Pair)
hoelzl@39097
   522
  have *: "(\<lambda>x. (Y x, X x))`space M = (\<lambda>(a,b). (b,a))`(\<lambda>x. (X x, Y x))`space M"
hoelzl@39097
   523
    by auto
hoelzl@39097
   524
  have inj: "\<And>X. inj_on (\<lambda>(a,b). (b,a)) X"
hoelzl@39097
   525
    by (auto intro!: inj_onI)
hoelzl@39097
   526
  show ?thesis
hoelzl@40859
   527
    unfolding sf[THEN entropy_eq] unfolding * setsum_reindex[OF inj]
hoelzl@39097
   528
    by (simp add: joint_distribution_commute[of Y X] split_beta)
hoelzl@39097
   529
qed
hoelzl@39097
   530
hoelzl@40859
   531
lemma (in information_space) entropy_eq_cartesian_product:
hoelzl@40859
   532
  assumes "simple_function X" "simple_function Y"
hoelzl@40859
   533
  shows "\<H>(\<lambda>x. (X x, Y x)) = -(\<Sum>x\<in>X`space M. \<Sum>y\<in>Y`space M.
hoelzl@39097
   534
    real (joint_distribution X Y {(x,y)}) *
hoelzl@39097
   535
    log b (real (joint_distribution X Y {(x,y)})))"
hoelzl@39097
   536
proof -
hoelzl@40859
   537
  have sf: "simple_function (\<lambda>x. (X x, Y x))"
hoelzl@40859
   538
    using assms by (auto intro: simple_function_Pair)
hoelzl@39097
   539
  { fix x assume "x\<notin>(\<lambda>x. (X x, Y x))`space M"
hoelzl@39097
   540
    then have "(\<lambda>x. (X x, Y x)) -` {x} \<inter> space M = {}" by auto
hoelzl@39097
   541
    then have "joint_distribution X Y {x} = 0"
hoelzl@39097
   542
      unfolding distribution_def by auto }
hoelzl@40859
   543
  then show ?thesis using sf assms
hoelzl@40859
   544
    unfolding entropy_eq[OF sf] neg_equal_iff_equal setsum_cartesian_product
hoelzl@40859
   545
    by (auto intro!: setsum_mono_zero_cong_left simp: simple_function_def)
hoelzl@39097
   546
qed
hoelzl@39097
   547
hoelzl@39097
   548
subsection {* Conditional Mutual Information *}
hoelzl@39097
   549
hoelzl@36080
   550
definition (in prob_space)
hoelzl@38656
   551
  "conditional_mutual_information b M1 M2 M3 X Y Z \<equiv>
hoelzl@40859
   552
    mutual_information b M1 (sigma (pair_algebra M2 M3)) X (\<lambda>x. (Y x, Z x)) -
hoelzl@38656
   553
    mutual_information b M1 M3 X Z"
hoelzl@36080
   554
hoelzl@40859
   555
abbreviation (in information_space)
hoelzl@40859
   556
  conditional_mutual_information_Pow ("\<I>'( _ ; _ | _ ')") where
hoelzl@36624
   557
  "\<I>(X ; Y | Z) \<equiv> conditional_mutual_information b
hoelzl@36080
   558
    \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr>
hoelzl@36080
   559
    \<lparr> space = Y`space M, sets = Pow (Y`space M) \<rparr>
hoelzl@36080
   560
    \<lparr> space = Z`space M, sets = Pow (Z`space M) \<rparr>
hoelzl@36080
   561
    X Y Z"
hoelzl@36080
   562
hoelzl@38656
   563
hoelzl@40859
   564
lemma (in information_space) conditional_mutual_information_generic_eq:
hoelzl@40859
   565
  assumes MX: "finite_random_variable MX X"
hoelzl@40859
   566
    and MY: "finite_random_variable MY Y"
hoelzl@40859
   567
    and MZ: "finite_random_variable MZ Z"
hoelzl@40859
   568
  shows "conditional_mutual_information b MX MY MZ X Y Z = (\<Sum>(x, y, z) \<in> space MX \<times> space MY \<times> space MZ.
hoelzl@38656
   569
             real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) *
hoelzl@38656
   570
             log b (real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) /
hoelzl@38656
   571
    (real (joint_distribution X Z {(x, z)}) * real (joint_distribution Y Z {(y,z)} / distribution Z {z}))))"
hoelzl@40859
   572
  (is "_ = (\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * log b (?XYZ x y z / (?XZ x z * ?YZdZ y z)))")
hoelzl@40859
   573
proof -
hoelzl@40859
   574
  let ?YZ = "\<lambda>y z. real (joint_distribution Y Z {(y, z)})"
hoelzl@40859
   575
  let ?X = "\<lambda>x. real (distribution X {x})"
hoelzl@40859
   576
  let ?Z = "\<lambda>z. real (distribution Z {z})"
hoelzl@40859
   577
hoelzl@40859
   578
  txt {* This proof is actually quiet easy, however we need to show that the
hoelzl@40859
   579
    distributions are finite and the joint distributions are zero when one of
hoelzl@40859
   580
    the variables distribution is also zero. *}
hoelzl@40859
   581
hoelzl@40859
   582
  note finite_var = MX MY MZ
hoelzl@40859
   583
  note random_var = finite_var[THEN finite_random_variableD]
hoelzl@40859
   584
hoelzl@40859
   585
  note space_simps = space_pair_algebra space_sigma algebra.simps
hoelzl@40859
   586
hoelzl@40859
   587
  note YZ = finite_random_variable_pairI[OF finite_var(2,3)]
hoelzl@40859
   588
  note XZ = finite_random_variable_pairI[OF finite_var(1,3)]
hoelzl@40859
   589
  note ZX = finite_random_variable_pairI[OF finite_var(3,1)]
hoelzl@40859
   590
  note YZX = finite_random_variable_pairI[OF finite_var(2) ZX]
hoelzl@40859
   591
  note order1 =
hoelzl@40859
   592
    finite_distribution_order(5,6)[OF finite_var(1) YZ, simplified space_simps]
hoelzl@40859
   593
    finite_distribution_order(5,6)[OF finite_var(1,3), simplified space_simps]
hoelzl@40859
   594
hoelzl@40859
   595
  note finite = finite_var(1) YZ finite_var(3) XZ YZX
hoelzl@40859
   596
  note finite[THEN finite_distribution_finite, simplified space_simps, simp]
hoelzl@40859
   597
hoelzl@40859
   598
  have order2: "\<And>x y z. \<lbrakk>x \<in> space MX; y \<in> space MY; z \<in> space MZ; joint_distribution X Z {(x, z)} = 0\<rbrakk>
hoelzl@40859
   599
          \<Longrightarrow> joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)} = 0"
hoelzl@40859
   600
    unfolding joint_distribution_commute_singleton[of X]
hoelzl@40859
   601
    unfolding joint_distribution_assoc_singleton[symmetric]
hoelzl@40859
   602
    using finite_distribution_order(6)[OF finite_var(2) ZX]
hoelzl@40859
   603
    by (auto simp: space_simps)
hoelzl@36624
   604
hoelzl@40859
   605
  have "(\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * log b (?XYZ x y z / (?XZ x z * ?YZdZ y z))) =
hoelzl@40859
   606
    (\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * (log b (?XYZ x y z / (?X x * ?YZ y z)) - log b (?XZ x z / (?X x * ?Z z))))"
hoelzl@40859
   607
    (is "(\<Sum>(x, y, z)\<in>?S. ?L x y z) = (\<Sum>(x, y, z)\<in>?S. ?R x y z)")
hoelzl@40859
   608
  proof (safe intro!: setsum_cong)
hoelzl@40859
   609
    fix x y z assume space: "x \<in> space MX" "y \<in> space MY" "z \<in> space MZ"
hoelzl@40859
   610
    then have *: "?XYZ x y z / (?XZ x z * ?YZdZ y z) =
hoelzl@40859
   611
      (?XYZ x y z / (?X x * ?YZ y z)) / (?XZ x z / (?X x * ?Z z))"
hoelzl@40859
   612
      using order1(3)
hoelzl@40859
   613
      by (auto simp: real_of_pinfreal_mult[symmetric] real_of_pinfreal_eq_0)
hoelzl@40859
   614
    show "?L x y z = ?R x y z"
hoelzl@40859
   615
    proof cases
hoelzl@40859
   616
      assume "?XYZ x y z \<noteq> 0"
hoelzl@40859
   617
      with space b_gt_1 order1 order2 show ?thesis unfolding *
hoelzl@40859
   618
        by (subst log_divide)
hoelzl@40859
   619
           (auto simp: zero_less_divide_iff zero_less_real_of_pinfreal
hoelzl@40859
   620
                       real_of_pinfreal_eq_0 zero_less_mult_iff)
hoelzl@40859
   621
    qed simp
hoelzl@40859
   622
  qed
hoelzl@40859
   623
  also have "\<dots> = (\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * log b (?XYZ x y z / (?X x * ?YZ y z))) -
hoelzl@40859
   624
                  (\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * log b (?XZ x z / (?X x * ?Z z)))"
hoelzl@40859
   625
    by (auto simp add: setsum_subtractf[symmetric] field_simps intro!: setsum_cong)
hoelzl@40859
   626
  also have "(\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * log b (?XZ x z / (?X x * ?Z z))) =
hoelzl@40859
   627
             (\<Sum>(x, z)\<in>space MX \<times> space MZ. ?XZ x z * log b (?XZ x z / (?X x * ?Z z)))"
hoelzl@40859
   628
    unfolding setsum_cartesian_product[symmetric] setsum_commute[of _ _ "space MY"]
hoelzl@40859
   629
              setsum_left_distrib[symmetric]
hoelzl@40859
   630
    unfolding joint_distribution_commute_singleton[of X]
hoelzl@40859
   631
    unfolding joint_distribution_assoc_singleton[symmetric]
hoelzl@40859
   632
    using setsum_real_joint_distribution_singleton[OF finite_var(2) ZX, unfolded space_simps]
hoelzl@40859
   633
    by (intro setsum_cong refl) simp
hoelzl@40859
   634
  also have "(\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * log b (?XYZ x y z / (?X x * ?YZ y z))) -
hoelzl@40859
   635
             (\<Sum>(x, z)\<in>space MX \<times> space MZ. ?XZ x z * log b (?XZ x z / (?X x * ?Z z))) =
hoelzl@40859
   636
             conditional_mutual_information b MX MY MZ X Y Z"
hoelzl@40859
   637
    unfolding conditional_mutual_information_def
hoelzl@40859
   638
    unfolding mutual_information_generic_eq[OF finite_var(1,3)]
hoelzl@40859
   639
    unfolding mutual_information_generic_eq[OF finite_var(1) YZ]
hoelzl@40859
   640
    by (simp add: space_sigma space_pair_algebra setsum_cartesian_product')
hoelzl@40859
   641
  finally show ?thesis by simp
hoelzl@40859
   642
qed
hoelzl@40859
   643
hoelzl@40859
   644
lemma (in information_space) conditional_mutual_information_eq:
hoelzl@40859
   645
  assumes "simple_function X" "simple_function Y" "simple_function Z"
hoelzl@40859
   646
  shows "\<I>(X;Y|Z) = (\<Sum>(x, y, z) \<in> X`space M \<times> Y`space M \<times> Z`space M.
hoelzl@40859
   647
             real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) *
hoelzl@40859
   648
             log b (real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) /
hoelzl@40859
   649
    (real (joint_distribution X Z {(x, z)}) * real (joint_distribution Y Z {(y,z)} / distribution Z {z}))))"
hoelzl@40859
   650
  using conditional_mutual_information_generic_eq[OF assms[THEN simple_function_imp_finite_random_variable]]
hoelzl@40859
   651
  by simp
hoelzl@40859
   652
hoelzl@40859
   653
lemma (in information_space) conditional_mutual_information_eq_mutual_information:
hoelzl@40859
   654
  assumes X: "simple_function X" and Y: "simple_function Y"
hoelzl@40859
   655
  shows "\<I>(X ; Y) = \<I>(X ; Y | (\<lambda>x. ()))"
hoelzl@36624
   656
proof -
hoelzl@36624
   657
  have [simp]: "(\<lambda>x. ()) ` space M = {()}" using not_empty by auto
hoelzl@40859
   658
  have C: "simple_function (\<lambda>x. ())" by auto
hoelzl@36624
   659
  show ?thesis
hoelzl@40859
   660
    unfolding conditional_mutual_information_eq[OF X Y C]
hoelzl@40859
   661
    unfolding mutual_information_eq[OF X Y]
hoelzl@36624
   662
    by (simp add: setsum_cartesian_product' distribution_remove_const)
hoelzl@36624
   663
qed
hoelzl@36624
   664
hoelzl@40859
   665
lemma (in prob_space) distribution_unit[simp]: "distribution (\<lambda>x. ()) {()} = 1"
hoelzl@40859
   666
  unfolding distribution_def using measure_space_1 by auto
hoelzl@40859
   667
hoelzl@40859
   668
lemma (in prob_space) joint_distribution_unit[simp]: "distribution (\<lambda>x. (X x, ())) {(a, ())} = distribution X {a}"
hoelzl@40859
   669
  unfolding distribution_def by (auto intro!: arg_cong[where f=\<mu>])
hoelzl@40859
   670
hoelzl@40859
   671
lemma (in prob_space) setsum_distribution:
hoelzl@40859
   672
  assumes X: "finite_random_variable MX X" shows "(\<Sum>a\<in>space MX. distribution X {a}) = 1"
hoelzl@40859
   673
  using setsum_joint_distribution[OF assms, of "\<lparr> space = UNIV, sets = Pow UNIV \<rparr>" "\<lambda>x. ()" "{()}"]
hoelzl@40859
   674
  using sigma_algebra_Pow[of "UNIV::unit set"] by simp
hoelzl@40859
   675
hoelzl@40859
   676
lemma (in prob_space) setsum_real_distribution:
hoelzl@40859
   677
  assumes X: "finite_random_variable MX X" shows "(\<Sum>a\<in>space MX. real (distribution X {a})) = 1"
hoelzl@40859
   678
  using setsum_real_joint_distribution[OF assms, of "\<lparr> space = UNIV, sets = Pow UNIV \<rparr>" "\<lambda>x. ()" "{()}"]
hoelzl@40859
   679
  using sigma_algebra_Pow[of "UNIV::unit set"] by simp
hoelzl@40859
   680
hoelzl@40859
   681
lemma (in information_space) conditional_mutual_information_generic_positive:
hoelzl@40859
   682
  assumes "finite_random_variable MX X" and "finite_random_variable MY Y" and "finite_random_variable MZ Z"
hoelzl@40859
   683
  shows "0 \<le> conditional_mutual_information b MX MY MZ X Y Z"
hoelzl@40859
   684
proof (cases "space MX \<times> space MY \<times> space MZ = {}")
hoelzl@40859
   685
  case True show ?thesis
hoelzl@40859
   686
    unfolding conditional_mutual_information_generic_eq[OF assms] True
hoelzl@40859
   687
    by simp
hoelzl@40859
   688
next
hoelzl@40859
   689
  case False
hoelzl@38656
   690
  let "?dXYZ A" = "real (distribution (\<lambda>x. (X x, Y x, Z x)) A)"
hoelzl@38656
   691
  let "?dXZ A" = "real (joint_distribution X Z A)"
hoelzl@38656
   692
  let "?dYZ A" = "real (joint_distribution Y Z A)"
hoelzl@38656
   693
  let "?dX A" = "real (distribution X A)"
hoelzl@38656
   694
  let "?dZ A" = "real (distribution Z A)"
hoelzl@40859
   695
  let ?M = "space MX \<times> space MY \<times> space MZ"
hoelzl@36624
   696
nipkow@39302
   697
  have split_beta: "\<And>f. split f = (\<lambda>x. f (fst x) (snd x))" by (simp add: fun_eq_iff)
hoelzl@36080
   698
hoelzl@40859
   699
  note space_simps = space_pair_algebra space_sigma algebra.simps
hoelzl@40859
   700
hoelzl@40859
   701
  note finite_var = assms
hoelzl@40859
   702
  note YZ = finite_random_variable_pairI[OF finite_var(2,3)]
hoelzl@40859
   703
  note XZ = finite_random_variable_pairI[OF finite_var(1,3)]
hoelzl@40859
   704
  note ZX = finite_random_variable_pairI[OF finite_var(3,1)]
hoelzl@40859
   705
  note YZ = finite_random_variable_pairI[OF finite_var(2,3)]
hoelzl@40859
   706
  note XYZ = finite_random_variable_pairI[OF finite_var(1) YZ]
hoelzl@40859
   707
  note finite = finite_var(3) YZ XZ XYZ
hoelzl@40859
   708
  note finite = finite[THEN finite_distribution_finite, simplified space_simps]
hoelzl@40859
   709
hoelzl@40859
   710
  have order: "\<And>x y z. \<lbrakk>x \<in> space MX; y \<in> space MY; z \<in> space MZ; joint_distribution X Z {(x, z)} = 0\<rbrakk>
hoelzl@40859
   711
          \<Longrightarrow> joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)} = 0"
hoelzl@40859
   712
    unfolding joint_distribution_commute_singleton[of X]
hoelzl@40859
   713
    unfolding joint_distribution_assoc_singleton[symmetric]
hoelzl@40859
   714
    using finite_distribution_order(6)[OF finite_var(2) ZX]
hoelzl@40859
   715
    by (auto simp: space_simps)
hoelzl@40859
   716
hoelzl@40859
   717
  note order = order
hoelzl@40859
   718
    finite_distribution_order(5,6)[OF finite_var(1) YZ, simplified space_simps]
hoelzl@40859
   719
    finite_distribution_order(5,6)[OF finite_var(2,3), simplified space_simps]
hoelzl@40859
   720
hoelzl@40859
   721
  have "- conditional_mutual_information b MX MY MZ X Y Z = - (\<Sum>(x, y, z) \<in> ?M. ?dXYZ {(x, y, z)} *
hoelzl@40859
   722
    log b (?dXYZ {(x, y, z)} / (?dXZ {(x, z)} * ?dYZ {(y,z)} / ?dZ {z})))"
hoelzl@40859
   723
    unfolding conditional_mutual_information_generic_eq[OF assms] neg_equal_iff_equal
hoelzl@40859
   724
    by (intro setsum_cong) (auto intro!: arg_cong[where f="log b"] simp: real_of_pinfreal_mult[symmetric])
hoelzl@40859
   725
  also have "\<dots> \<le> log b (\<Sum>(x, y, z) \<in> ?M. ?dXZ {(x, z)} * ?dYZ {(y,z)} / ?dZ {z})"
hoelzl@36624
   726
    unfolding split_beta
hoelzl@36624
   727
  proof (rule log_setsum_divide)
hoelzl@40859
   728
    show "?M \<noteq> {}" using False by simp
hoelzl@36624
   729
    show "1 < b" using b_gt_1 .
hoelzl@36080
   730
hoelzl@40859
   731
    show "finite ?M" using assms
hoelzl@40859
   732
      unfolding finite_sigma_algebra_def finite_sigma_algebra_axioms_def by auto
hoelzl@40859
   733
hoelzl@40859
   734
    show "(\<Sum>x\<in>?M. ?dXYZ {(fst x, fst (snd x), snd (snd x))}) = 1"
hoelzl@40859
   735
      unfolding setsum_cartesian_product'
hoelzl@40859
   736
      unfolding setsum_commute[of _ "space MY"]
hoelzl@40859
   737
      unfolding setsum_commute[of _ "space MZ"]
hoelzl@40859
   738
      by (simp_all add: space_pair_algebra
hoelzl@40859
   739
        setsum_real_joint_distribution_singleton[OF `finite_random_variable MX X` YZ]
hoelzl@40859
   740
        setsum_real_joint_distribution_singleton[OF `finite_random_variable MY Y` finite_var(3)]
hoelzl@40859
   741
        setsum_real_distribution[OF `finite_random_variable MZ Z`])
hoelzl@40859
   742
hoelzl@36624
   743
    fix x assume "x \<in> ?M"
hoelzl@38656
   744
    let ?x = "(fst x, fst (snd x), snd (snd x))"
hoelzl@38656
   745
hoelzl@38656
   746
    show "0 \<le> ?dXYZ {?x}" using real_pinfreal_nonneg .
hoelzl@36624
   747
    show "0 \<le> ?dXZ {(fst x, snd (snd x))} * ?dYZ {(fst (snd x), snd (snd x))} / ?dZ {snd (snd x)}"
hoelzl@38656
   748
     by (simp add: real_pinfreal_nonneg mult_nonneg_nonneg divide_nonneg_nonneg)
hoelzl@36080
   749
hoelzl@38656
   750
    assume *: "0 < ?dXYZ {?x}"
hoelzl@40859
   751
    with `x \<in> ?M` show "0 < ?dXZ {(fst x, snd (snd x))} * ?dYZ {(fst (snd x), snd (snd x))} / ?dZ {snd (snd x)}"
hoelzl@40859
   752
      using finite order
hoelzl@40859
   753
      by (cases x)
hoelzl@40859
   754
         (auto simp add: zero_less_real_of_pinfreal zero_less_mult_iff zero_less_divide_iff)
hoelzl@40859
   755
  qed
hoelzl@40859
   756
  also have "(\<Sum>(x, y, z) \<in> ?M. ?dXZ {(x, z)} * ?dYZ {(y,z)} / ?dZ {z}) = (\<Sum>z\<in>space MZ. ?dZ {z})"
hoelzl@36624
   757
    apply (simp add: setsum_cartesian_product')
hoelzl@36624
   758
    apply (subst setsum_commute)
hoelzl@36624
   759
    apply (subst (2) setsum_commute)
hoelzl@40859
   760
    by (auto simp: setsum_divide_distrib[symmetric] setsum_product[symmetric]
hoelzl@40859
   761
                   setsum_real_joint_distribution_singleton[OF finite_var(1,3)]
hoelzl@40859
   762
                   setsum_real_joint_distribution_singleton[OF finite_var(2,3)]
hoelzl@36624
   763
          intro!: setsum_cong)
hoelzl@40859
   764
  also have "log b (\<Sum>z\<in>space MZ. ?dZ {z}) = 0"
hoelzl@40859
   765
    unfolding setsum_real_distribution[OF finite_var(3)] by simp
hoelzl@40859
   766
  finally show ?thesis by simp
hoelzl@36080
   767
qed
hoelzl@36080
   768
hoelzl@40859
   769
lemma (in information_space) conditional_mutual_information_positive:
hoelzl@40859
   770
  assumes "simple_function X" and "simple_function Y" and "simple_function Z"
hoelzl@40859
   771
  shows "0 \<le> \<I>(X;Y|Z)"
hoelzl@40859
   772
  using conditional_mutual_information_generic_positive[OF assms[THEN simple_function_imp_finite_random_variable]]
hoelzl@40859
   773
  by simp
hoelzl@40859
   774
hoelzl@39097
   775
subsection {* Conditional Entropy *}
hoelzl@39097
   776
hoelzl@36080
   777
definition (in prob_space)
hoelzl@36080
   778
  "conditional_entropy b S T X Y = conditional_mutual_information b S S T X X Y"
hoelzl@36080
   779
hoelzl@40859
   780
abbreviation (in information_space)
hoelzl@40859
   781
  conditional_entropy_Pow ("\<H>'(_ | _')") where
hoelzl@36624
   782
  "\<H>(X | Y) \<equiv> conditional_entropy b
hoelzl@36080
   783
    \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr>
hoelzl@36080
   784
    \<lparr> space = Y`space M, sets = Pow (Y`space M) \<rparr> X Y"
hoelzl@36080
   785
hoelzl@40859
   786
lemma (in information_space) conditional_entropy_positive:
hoelzl@40859
   787
  "simple_function X \<Longrightarrow> simple_function Y \<Longrightarrow> 0 \<le> \<H>(X | Y)"
hoelzl@40859
   788
  unfolding conditional_entropy_def by (auto intro!: conditional_mutual_information_positive)
hoelzl@36080
   789
hoelzl@40859
   790
lemma (in measure_space) empty_measureI: "A = {} \<Longrightarrow> \<mu> A = 0" by simp
hoelzl@40859
   791
hoelzl@40859
   792
lemma (in information_space) conditional_entropy_generic_eq:
hoelzl@40859
   793
  assumes MX: "finite_random_variable MX X"
hoelzl@40859
   794
  assumes MZ: "finite_random_variable MZ Z"
hoelzl@39097
   795
  shows "conditional_entropy b MX MZ X Z =
hoelzl@39097
   796
     - (\<Sum>(x, z)\<in>space MX \<times> space MZ.
hoelzl@39097
   797
         real (joint_distribution X Z {(x, z)}) *
hoelzl@39097
   798
         log b (real (joint_distribution X Z {(x, z)}) / real (distribution Z {z})))"
hoelzl@40859
   799
proof -
hoelzl@40859
   800
  interpret MX: finite_sigma_algebra MX using MX by simp
hoelzl@40859
   801
  interpret MZ: finite_sigma_algebra MZ using MZ by simp
hoelzl@40859
   802
  let "?XXZ x y z" = "joint_distribution X (\<lambda>x. (X x, Z x)) {(x, y, z)}"
hoelzl@40859
   803
  let "?XZ x z" = "joint_distribution X Z {(x, z)}"
hoelzl@40859
   804
  let "?Z z" = "distribution Z {z}"
hoelzl@40859
   805
  let "?f x y z" = "log b (real (?XXZ x y z) / (real (?XZ x z) * real (?XZ y z / ?Z z)))"
hoelzl@40859
   806
  { fix x z have "?XXZ x x z = ?XZ x z"
hoelzl@40859
   807
      unfolding distribution_def by (auto intro!: arg_cong[where f=\<mu>]) }
hoelzl@40859
   808
  note this[simp]
hoelzl@40859
   809
  { fix x x' :: 'b and z assume "x' \<noteq> x"
hoelzl@40859
   810
    then have "?XXZ x x' z = 0"
hoelzl@40859
   811
      by (auto simp: distribution_def intro!: arg_cong[where f=\<mu>] empty_measureI) }
hoelzl@40859
   812
  note this[simp]
hoelzl@40859
   813
  { fix x x' z assume *: "x \<in> space MX" "z \<in> space MZ"
hoelzl@40859
   814
    then have "(\<Sum>x'\<in>space MX. real (?XXZ x x' z) * ?f x x' z)
hoelzl@40859
   815
      = (\<Sum>x'\<in>space MX. if x = x' then real (?XZ x z) * ?f x x z else 0)"
hoelzl@40859
   816
      by (auto intro!: setsum_cong)
hoelzl@40859
   817
    also have "\<dots> = real (?XZ x z) * ?f x x z"
hoelzl@40859
   818
      using `x \<in> space MX` by (simp add: setsum_cases[OF MX.finite_space])
hoelzl@40859
   819
    also have "\<dots> = real (?XZ x z) * log b (real (?Z z) / real (?XZ x z))"
hoelzl@40859
   820
      by (auto simp: real_of_pinfreal_mult[symmetric])
hoelzl@40859
   821
    also have "\<dots> = - real (?XZ x z) * log b (real (?XZ x z) / real (?Z z))"
hoelzl@40859
   822
      using assms[THEN finite_distribution_finite]
hoelzl@40859
   823
      using finite_distribution_order(6)[OF MX MZ]
hoelzl@40859
   824
      by (auto simp: log_simps field_simps zero_less_mult_iff zero_less_real_of_pinfreal real_of_pinfreal_eq_0)
hoelzl@40859
   825
    finally have "(\<Sum>x'\<in>space MX. real (?XXZ x x' z) * ?f x x' z) =
hoelzl@40859
   826
      - real (?XZ x z) * log b (real (?XZ x z) / real (?Z z))" . }
hoelzl@40859
   827
  note * = this
hoelzl@40859
   828
hoelzl@40859
   829
  show ?thesis
hoelzl@40859
   830
    unfolding conditional_entropy_def
hoelzl@40859
   831
    unfolding conditional_mutual_information_generic_eq[OF MX MX MZ]
hoelzl@40859
   832
    by (auto simp: setsum_cartesian_product' setsum_negf[symmetric]
hoelzl@40859
   833
                   setsum_commute[of _ "space MZ"] *   simp del: divide_pinfreal_def
hoelzl@40859
   834
             intro!: setsum_cong)
hoelzl@39097
   835
qed
hoelzl@39097
   836
hoelzl@40859
   837
lemma (in information_space) conditional_entropy_eq:
hoelzl@40859
   838
  assumes "simple_function X" "simple_function Z"
hoelzl@40859
   839
  shows "\<H>(X | Z) =
hoelzl@36080
   840
     - (\<Sum>(x, z)\<in>X ` space M \<times> Z ` space M.
hoelzl@38656
   841
         real (joint_distribution X Z {(x, z)}) *
hoelzl@38656
   842
         log b (real (joint_distribution X Z {(x, z)}) / real (distribution Z {z})))"
hoelzl@40859
   843
  using conditional_entropy_generic_eq[OF assms[THEN simple_function_imp_finite_random_variable]]
hoelzl@40859
   844
  by simp
hoelzl@39097
   845
hoelzl@40859
   846
lemma (in information_space) conditional_entropy_eq_ce_with_hypothesis:
hoelzl@40859
   847
  assumes X: "simple_function X" and Y: "simple_function Y"
hoelzl@40859
   848
  shows "\<H>(X | Y) =
hoelzl@39097
   849
    -(\<Sum>y\<in>Y`space M. real (distribution Y {y}) *
hoelzl@39097
   850
      (\<Sum>x\<in>X`space M. real (joint_distribution X Y {(x,y)}) / real (distribution Y {(y)}) *
hoelzl@39097
   851
              log b (real (joint_distribution X Y {(x,y)}) / real (distribution Y {(y)}))))"
hoelzl@40859
   852
  unfolding conditional_entropy_eq[OF assms]
hoelzl@40859
   853
  using finite_distribution_finite[OF finite_random_variable_pairI[OF assms[THEN simple_function_imp_finite_random_variable]]]
hoelzl@40859
   854
  using finite_distribution_order(5,6)[OF assms[THEN simple_function_imp_finite_random_variable]]
hoelzl@40859
   855
  using finite_distribution_finite[OF Y[THEN simple_function_imp_finite_random_variable]]
hoelzl@40859
   856
  by (auto simp: setsum_cartesian_product'  setsum_commute[of _ "Y`space M"] setsum_right_distrib real_of_pinfreal_eq_0
hoelzl@40859
   857
           intro!: setsum_cong)
hoelzl@39097
   858
hoelzl@40859
   859
lemma (in information_space) conditional_entropy_eq_cartesian_product:
hoelzl@40859
   860
  assumes "simple_function X" "simple_function Y"
hoelzl@40859
   861
  shows "\<H>(X | Y) = -(\<Sum>x\<in>X`space M. \<Sum>y\<in>Y`space M.
hoelzl@39097
   862
    real (joint_distribution X Y {(x,y)}) *
hoelzl@39097
   863
    log b (real (joint_distribution X Y {(x,y)}) / real (distribution Y {y})))"
hoelzl@40859
   864
  unfolding conditional_entropy_eq[OF assms]
hoelzl@40859
   865
  by (auto intro!: setsum_cong simp: setsum_cartesian_product')
hoelzl@36080
   866
hoelzl@39097
   867
subsection {* Equalities *}
hoelzl@39097
   868
hoelzl@40859
   869
lemma (in information_space) mutual_information_eq_entropy_conditional_entropy:
hoelzl@40859
   870
  assumes X: "simple_function X" and Z: "simple_function Z"
hoelzl@40859
   871
  shows  "\<I>(X ; Z) = \<H>(X) - \<H>(X | Z)"
hoelzl@40859
   872
proof -
hoelzl@40859
   873
  let "?XZ x z" = "real (joint_distribution X Z {(x, z)})"
hoelzl@40859
   874
  let "?Z z" = "real (distribution Z {z})"
hoelzl@40859
   875
  let "?X x" = "real (distribution X {x})"
hoelzl@40859
   876
  note fX = X[THEN simple_function_imp_finite_random_variable]
hoelzl@40859
   877
  note fZ = Z[THEN simple_function_imp_finite_random_variable]
hoelzl@40859
   878
  note fX[THEN finite_distribution_finite, simp] and fZ[THEN finite_distribution_finite, simp]
hoelzl@40859
   879
  note finite_distribution_order[OF fX fZ, simp]
hoelzl@40859
   880
  { fix x z assume "x \<in> X`space M" "z \<in> Z`space M"
hoelzl@40859
   881
    have "?XZ x z * log b (?XZ x z / (?X x * ?Z z)) =
hoelzl@40859
   882
          ?XZ x z * log b (?XZ x z / ?Z z) - ?XZ x z * log b (?X x)"
hoelzl@40859
   883
      by (auto simp: log_simps real_of_pinfreal_mult[symmetric] zero_less_mult_iff
hoelzl@40859
   884
                     zero_less_real_of_pinfreal field_simps real_of_pinfreal_eq_0 abs_mult) }
hoelzl@40859
   885
  note * = this
hoelzl@40859
   886
  show ?thesis
hoelzl@40859
   887
    unfolding entropy_eq[OF X] conditional_entropy_eq[OF X Z] mutual_information_eq[OF X Z]
hoelzl@40859
   888
    using setsum_real_joint_distribution_singleton[OF fZ fX, unfolded joint_distribution_commute_singleton[of Z X]]
hoelzl@40859
   889
    by (simp add: * setsum_cartesian_product' setsum_subtractf setsum_left_distrib[symmetric]
hoelzl@40859
   890
                     setsum_real_distribution)
hoelzl@40859
   891
qed
hoelzl@36080
   892
hoelzl@40859
   893
lemma (in information_space) conditional_entropy_less_eq_entropy:
hoelzl@40859
   894
  assumes X: "simple_function X" and Z: "simple_function Z"
hoelzl@40859
   895
  shows "\<H>(X | Z) \<le> \<H>(X)"
hoelzl@36624
   896
proof -
hoelzl@40859
   897
  have "\<I>(X ; Z) = \<H>(X) - \<H>(X | Z)" using mutual_information_eq_entropy_conditional_entropy[OF assms] .
hoelzl@40859
   898
  with mutual_information_positive[OF X Z] entropy_positive[OF X]
hoelzl@36624
   899
  show ?thesis by auto
hoelzl@36080
   900
qed
hoelzl@36080
   901
hoelzl@40859
   902
lemma (in information_space) entropy_chain_rule:
hoelzl@40859
   903
  assumes X: "simple_function X" and Y: "simple_function Y"
hoelzl@40859
   904
  shows  "\<H>(\<lambda>x. (X x, Y x)) = \<H>(X) + \<H>(Y|X)"
hoelzl@40859
   905
proof -
hoelzl@40859
   906
  let "?XY x y" = "real (joint_distribution X Y {(x, y)})"
hoelzl@40859
   907
  let "?Y y" = "real (distribution Y {y})"
hoelzl@40859
   908
  let "?X x" = "real (distribution X {x})"
hoelzl@40859
   909
  note fX = X[THEN simple_function_imp_finite_random_variable]
hoelzl@40859
   910
  note fY = Y[THEN simple_function_imp_finite_random_variable]
hoelzl@40859
   911
  note fX[THEN finite_distribution_finite, simp] and fY[THEN finite_distribution_finite, simp]
hoelzl@40859
   912
  note finite_distribution_order[OF fX fY, simp]
hoelzl@40859
   913
  { fix x y assume "x \<in> X`space M" "y \<in> Y`space M"
hoelzl@40859
   914
    have "?XY x y * log b (?XY x y / ?X x) =
hoelzl@40859
   915
          ?XY x y * log b (?XY x y) - ?XY x y * log b (?X x)"
hoelzl@40859
   916
      by (auto simp: log_simps real_of_pinfreal_mult[symmetric] zero_less_mult_iff
hoelzl@40859
   917
                     zero_less_real_of_pinfreal field_simps real_of_pinfreal_eq_0 abs_mult) }
hoelzl@40859
   918
  note * = this
hoelzl@40859
   919
  show ?thesis
hoelzl@40859
   920
    using setsum_real_joint_distribution_singleton[OF fY fX]
hoelzl@40859
   921
    unfolding entropy_eq[OF X] conditional_entropy_eq_cartesian_product[OF Y X] entropy_eq_cartesian_product[OF X Y]
hoelzl@40859
   922
    unfolding joint_distribution_commute_singleton[of Y X] setsum_commute[of _ "X`space M"]
hoelzl@40859
   923
    by (simp add: * setsum_subtractf setsum_left_distrib[symmetric])
hoelzl@40859
   924
qed
hoelzl@38656
   925
hoelzl@39097
   926
section {* Partitioning *}
hoelzl@36080
   927
hoelzl@36624
   928
definition "subvimage A f g \<longleftrightarrow> (\<forall>x \<in> A. f -` {f x} \<inter> A \<subseteq> g -` {g x} \<inter> A)"
hoelzl@36624
   929
hoelzl@36624
   930
lemma subvimageI:
hoelzl@36624
   931
  assumes "\<And>x y. \<lbrakk> x \<in> A ; y \<in> A ; f x = f y \<rbrakk> \<Longrightarrow> g x = g y"
hoelzl@36624
   932
  shows "subvimage A f g"
hoelzl@36624
   933
  using assms unfolding subvimage_def by blast
hoelzl@36624
   934
hoelzl@36624
   935
lemma subvimageE[consumes 1]:
hoelzl@36624
   936
  assumes "subvimage A f g"
hoelzl@36624
   937
  obtains "\<And>x y. \<lbrakk> x \<in> A ; y \<in> A ; f x = f y \<rbrakk> \<Longrightarrow> g x = g y"
hoelzl@36624
   938
  using assms unfolding subvimage_def by blast
hoelzl@36624
   939
hoelzl@36624
   940
lemma subvimageD:
hoelzl@36624
   941
  "\<lbrakk> subvimage A f g ; x \<in> A ; y \<in> A ; f x = f y \<rbrakk> \<Longrightarrow> g x = g y"
hoelzl@36624
   942
  using assms unfolding subvimage_def by blast
hoelzl@36624
   943
hoelzl@36624
   944
lemma subvimage_subset:
hoelzl@36624
   945
  "\<lbrakk> subvimage B f g ; A \<subseteq> B \<rbrakk> \<Longrightarrow> subvimage A f g"
hoelzl@36624
   946
  unfolding subvimage_def by auto
hoelzl@36624
   947
hoelzl@36624
   948
lemma subvimage_idem[intro]: "subvimage A g g"
hoelzl@36624
   949
  by (safe intro!: subvimageI)
hoelzl@36624
   950
hoelzl@36624
   951
lemma subvimage_comp_finer[intro]:
hoelzl@36624
   952
  assumes svi: "subvimage A g h"
hoelzl@36624
   953
  shows "subvimage A g (f \<circ> h)"
hoelzl@36624
   954
proof (rule subvimageI, simp)
hoelzl@36624
   955
  fix x y assume "x \<in> A" "y \<in> A" "g x = g y"
hoelzl@36624
   956
  from svi[THEN subvimageD, OF this]
hoelzl@36624
   957
  show "f (h x) = f (h y)" by simp
hoelzl@36624
   958
qed
hoelzl@36624
   959
hoelzl@36624
   960
lemma subvimage_comp_gran:
hoelzl@36624
   961
  assumes svi: "subvimage A g h"
hoelzl@36624
   962
  assumes inj: "inj_on f (g ` A)"
hoelzl@36624
   963
  shows "subvimage A (f \<circ> g) h"
hoelzl@36624
   964
  by (rule subvimageI) (auto intro!: subvimageD[OF svi] simp: inj_on_iff[OF inj])
hoelzl@36624
   965
hoelzl@36624
   966
lemma subvimage_comp:
hoelzl@36624
   967
  assumes svi: "subvimage (f ` A) g h"
hoelzl@36624
   968
  shows "subvimage A (g \<circ> f) (h \<circ> f)"
hoelzl@36624
   969
  by (rule subvimageI) (auto intro!: svi[THEN subvimageD])
hoelzl@36624
   970
hoelzl@36624
   971
lemma subvimage_trans:
hoelzl@36624
   972
  assumes fg: "subvimage A f g"
hoelzl@36624
   973
  assumes gh: "subvimage A g h"
hoelzl@36624
   974
  shows "subvimage A f h"
hoelzl@36624
   975
  by (rule subvimageI) (auto intro!: fg[THEN subvimageD] gh[THEN subvimageD])
hoelzl@36624
   976
hoelzl@36624
   977
lemma subvimage_translator:
hoelzl@36624
   978
  assumes svi: "subvimage A f g"
hoelzl@36624
   979
  shows "\<exists>h. \<forall>x \<in> A. h (f x)  = g x"
hoelzl@36624
   980
proof (safe intro!: exI[of _ "\<lambda>x. (THE z. z \<in> (g ` (f -` {x} \<inter> A)))"])
hoelzl@36624
   981
  fix x assume "x \<in> A"
hoelzl@36624
   982
  show "(THE x'. x' \<in> (g ` (f -` {f x} \<inter> A))) = g x"
hoelzl@36624
   983
    by (rule theI2[of _ "g x"])
hoelzl@36624
   984
      (insert `x \<in> A`, auto intro!: svi[THEN subvimageD])
hoelzl@36624
   985
qed
hoelzl@36624
   986
hoelzl@36624
   987
lemma subvimage_translator_image:
hoelzl@36624
   988
  assumes svi: "subvimage A f g"
hoelzl@36624
   989
  shows "\<exists>h. h ` f ` A = g ` A"
hoelzl@36624
   990
proof -
hoelzl@36624
   991
  from subvimage_translator[OF svi]
hoelzl@36624
   992
  obtain h where "\<And>x. x \<in> A \<Longrightarrow> h (f x) = g x" by auto
hoelzl@36624
   993
  thus ?thesis
hoelzl@36624
   994
    by (auto intro!: exI[of _ h]
hoelzl@36624
   995
      simp: image_compose[symmetric] comp_def cong: image_cong)
hoelzl@36624
   996
qed
hoelzl@36624
   997
hoelzl@36624
   998
lemma subvimage_finite:
hoelzl@36624
   999
  assumes svi: "subvimage A f g" and fin: "finite (f`A)"
hoelzl@36624
  1000
  shows "finite (g`A)"
hoelzl@36624
  1001
proof -
hoelzl@36624
  1002
  from subvimage_translator_image[OF svi]
hoelzl@36624
  1003
  obtain h where "g`A = h`f`A" by fastsimp
hoelzl@36624
  1004
  with fin show "finite (g`A)" by simp
hoelzl@36624
  1005
qed
hoelzl@36624
  1006
hoelzl@36624
  1007
lemma subvimage_disj:
hoelzl@36624
  1008
  assumes svi: "subvimage A f g"
hoelzl@36624
  1009
  shows "f -` {x} \<inter> A \<subseteq> g -` {y} \<inter> A \<or>
hoelzl@36624
  1010
      f -` {x} \<inter> g -` {y} \<inter> A = {}" (is "?sub \<or> ?dist")
hoelzl@36624
  1011
proof (rule disjCI)
hoelzl@36624
  1012
  assume "\<not> ?dist"
hoelzl@36624
  1013
  then obtain z where "z \<in> A" and "x = f z" and "y = g z" by auto
hoelzl@36624
  1014
  thus "?sub" using svi unfolding subvimage_def by auto
hoelzl@36624
  1015
qed
hoelzl@36624
  1016
hoelzl@36624
  1017
lemma setsum_image_split:
hoelzl@36624
  1018
  assumes svi: "subvimage A f g" and fin: "finite (f ` A)"
hoelzl@36624
  1019
  shows "(\<Sum>x\<in>f`A. h x) = (\<Sum>y\<in>g`A. \<Sum>x\<in>f`(g -` {y} \<inter> A). h x)"
hoelzl@36624
  1020
    (is "?lhs = ?rhs")
hoelzl@36624
  1021
proof -
hoelzl@36624
  1022
  have "f ` A =
hoelzl@36624
  1023
      snd ` (SIGMA x : g ` A. f ` (g -` {x} \<inter> A))"
hoelzl@36624
  1024
      (is "_ = snd ` ?SIGMA")
hoelzl@36624
  1025
    unfolding image_split_eq_Sigma[symmetric]
hoelzl@36624
  1026
    by (simp add: image_compose[symmetric] comp_def)
hoelzl@36624
  1027
  moreover
hoelzl@36624
  1028
  have snd_inj: "inj_on snd ?SIGMA"
hoelzl@36624
  1029
    unfolding image_split_eq_Sigma[symmetric]
hoelzl@36624
  1030
    by (auto intro!: inj_onI subvimageD[OF svi])
hoelzl@36624
  1031
  ultimately
hoelzl@36624
  1032
  have "(\<Sum>x\<in>f`A. h x) = (\<Sum>(x,y)\<in>?SIGMA. h y)"
hoelzl@36624
  1033
    by (auto simp: setsum_reindex intro: setsum_cong)
hoelzl@36624
  1034
  also have "... = ?rhs"
hoelzl@36624
  1035
    using subvimage_finite[OF svi fin] fin
hoelzl@36624
  1036
    apply (subst setsum_Sigma[symmetric])
hoelzl@36624
  1037
    by (auto intro!: finite_subset[of _ "f`A"])
hoelzl@36624
  1038
  finally show ?thesis .
hoelzl@36624
  1039
qed
hoelzl@36624
  1040
hoelzl@40859
  1041
lemma (in information_space) entropy_partition:
hoelzl@40859
  1042
  assumes sf: "simple_function X" "simple_function P"
hoelzl@36624
  1043
  assumes svi: "subvimage (space M) X P"
hoelzl@36624
  1044
  shows "\<H>(X) = \<H>(P) + \<H>(X|P)"
hoelzl@36624
  1045
proof -
hoelzl@40859
  1046
  let "?XP x p" = "real (joint_distribution X P {(x, p)})"
hoelzl@40859
  1047
  let "?X x" = "real (distribution X {x})"
hoelzl@40859
  1048
  let "?P p" = "real (distribution P {p})"
hoelzl@40859
  1049
  note fX = sf(1)[THEN simple_function_imp_finite_random_variable]
hoelzl@40859
  1050
  note fP = sf(2)[THEN simple_function_imp_finite_random_variable]
hoelzl@40859
  1051
  note fX[THEN finite_distribution_finite, simp] and fP[THEN finite_distribution_finite, simp]
hoelzl@40859
  1052
  note finite_distribution_order[OF fX fP, simp]
hoelzl@38656
  1053
  have "(\<Sum>x\<in>X ` space M. real (distribution X {x}) * log b (real (distribution X {x}))) =
hoelzl@36624
  1054
    (\<Sum>y\<in>P `space M. \<Sum>x\<in>X ` space M.
hoelzl@38656
  1055
    real (joint_distribution X P {(x, y)}) * log b (real (joint_distribution X P {(x, y)})))"
hoelzl@36624
  1056
  proof (subst setsum_image_split[OF svi],
hoelzl@40859
  1057
      safe intro!: setsum_mono_zero_cong_left imageI)
hoelzl@40859
  1058
    show "finite (X ` space M)" "finite (X ` space M)" "finite (P ` space M)"
hoelzl@40859
  1059
      using sf unfolding simple_function_def by auto
hoelzl@40859
  1060
  next
hoelzl@36624
  1061
    fix p x assume in_space: "p \<in> space M" "x \<in> space M"
hoelzl@38656
  1062
    assume "real (joint_distribution X P {(X x, P p)}) * log b (real (joint_distribution X P {(X x, P p)})) \<noteq> 0"
hoelzl@36624
  1063
    hence "(\<lambda>x. (X x, P x)) -` {(X x, P p)} \<inter> space M \<noteq> {}" by (auto simp: distribution_def)
hoelzl@36624
  1064
    with svi[unfolded subvimage_def, rule_format, OF `x \<in> space M`]
hoelzl@36624
  1065
    show "x \<in> P -` {P p}" by auto
hoelzl@36624
  1066
  next
hoelzl@36624
  1067
    fix p x assume in_space: "p \<in> space M" "x \<in> space M"
hoelzl@36624
  1068
    assume "P x = P p"
hoelzl@36624
  1069
    from this[symmetric] svi[unfolded subvimage_def, rule_format, OF `x \<in> space M`]
hoelzl@36624
  1070
    have "X -` {X x} \<inter> space M \<subseteq> P -` {P p} \<inter> space M"
hoelzl@36624
  1071
      by auto
hoelzl@36624
  1072
    hence "(\<lambda>x. (X x, P x)) -` {(X x, P p)} \<inter> space M = X -` {X x} \<inter> space M"
hoelzl@36624
  1073
      by auto
hoelzl@38656
  1074
    thus "real (distribution X {X x}) * log b (real (distribution X {X x})) =
hoelzl@38656
  1075
          real (joint_distribution X P {(X x, P p)}) *
hoelzl@38656
  1076
          log b (real (joint_distribution X P {(X x, P p)}))"
hoelzl@36624
  1077
      by (auto simp: distribution_def)
hoelzl@36624
  1078
  qed
hoelzl@40859
  1079
  moreover have "\<And>x y. real (joint_distribution X P {(x, y)}) *
hoelzl@40859
  1080
      log b (real (joint_distribution X P {(x, y)}) / real (distribution P {y})) =
hoelzl@40859
  1081
      real (joint_distribution X P {(x, y)}) * log b (real (joint_distribution X P {(x, y)})) -
hoelzl@40859
  1082
      real (joint_distribution X P {(x, y)}) * log b (real (distribution P {y}))"
hoelzl@40859
  1083
    by (auto simp add: log_simps zero_less_mult_iff field_simps)
hoelzl@40859
  1084
  ultimately show ?thesis
hoelzl@40859
  1085
    unfolding sf[THEN entropy_eq] conditional_entropy_eq[OF sf]
hoelzl@40859
  1086
    using setsum_real_joint_distribution_singleton[OF fX fP]
hoelzl@38656
  1087
    by (simp add: setsum_cartesian_product' setsum_subtractf setsum_real_distribution
hoelzl@36624
  1088
      setsum_left_distrib[symmetric] setsum_commute[where B="P`space M"])
hoelzl@36624
  1089
qed
hoelzl@36624
  1090
hoelzl@40859
  1091
corollary (in information_space) entropy_data_processing:
hoelzl@40859
  1092
  assumes X: "simple_function X" shows "\<H>(f \<circ> X) \<le> \<H>(X)"
hoelzl@40859
  1093
proof -
hoelzl@40859
  1094
  note X
hoelzl@40859
  1095
  moreover have fX: "simple_function (f \<circ> X)" using X by auto
hoelzl@40859
  1096
  moreover have "subvimage (space M) X (f \<circ> X)" by auto
hoelzl@40859
  1097
  ultimately have "\<H>(X) = \<H>(f\<circ>X) + \<H>(X|f\<circ>X)" by (rule entropy_partition)
hoelzl@40859
  1098
  then show "\<H>(f \<circ> X) \<le> \<H>(X)"
hoelzl@40859
  1099
    by (auto intro: conditional_entropy_positive[OF X fX])
hoelzl@40859
  1100
qed
hoelzl@36624
  1101
hoelzl@40859
  1102
corollary (in information_space) entropy_of_inj:
hoelzl@40859
  1103
  assumes X: "simple_function X" and inj: "inj_on f (X`space M)"
hoelzl@36624
  1104
  shows "\<H>(f \<circ> X) = \<H>(X)"
hoelzl@36624
  1105
proof (rule antisym)
hoelzl@40859
  1106
  show "\<H>(f \<circ> X) \<le> \<H>(X)" using entropy_data_processing[OF X] .
hoelzl@36624
  1107
next
hoelzl@40859
  1108
  have sf: "simple_function (f \<circ> X)"
hoelzl@40859
  1109
    using X by auto
hoelzl@36624
  1110
  have "\<H>(X) = \<H>(the_inv_into (X`space M) f \<circ> (f \<circ> X))"
hoelzl@40859
  1111
    by (auto intro!: mutual_information_cong simp: entropy_def the_inv_into_f_f[OF inj])
hoelzl@36624
  1112
  also have "... \<le> \<H>(f \<circ> X)"
hoelzl@40859
  1113
    using entropy_data_processing[OF sf] .
hoelzl@36624
  1114
  finally show "\<H>(X) \<le> \<H>(f \<circ> X)" .
hoelzl@36624
  1115
qed
hoelzl@36624
  1116
hoelzl@36080
  1117
end