src/HOL/Probability/Information.thy
author hoelzl
Wed Feb 02 12:34:45 2011 +0100 (2011-02-02)
changeset 41689 3e39b0e730d6
parent 41661 baf1964bc468
child 41833 563bea92b2c0
permissions -rw-r--r--
the measure valuation is again part of the measure_space type, instead of an explicit parameter to the locale;
changed syntax for simple_function, simple_integral, positive_integral, integral and RN_deriv.
introduced binder variants for simple_integral, positive_integral and integral.
hoelzl@36080
     1
theory Information
wenzelm@41413
     2
imports
wenzelm@41413
     3
  Probability_Space
wenzelm@41413
     4
  "~~/src/HOL/Library/Convex"
wenzelm@41413
     5
  Lebesgue_Measure
hoelzl@36080
     6
begin
hoelzl@36080
     7
hoelzl@39097
     8
lemma log_le: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log a x \<le> log a y"
hoelzl@39097
     9
  by (subst log_le_cancel_iff) auto
hoelzl@39097
    10
hoelzl@39097
    11
lemma log_less: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x < y \<Longrightarrow> log a x < log a y"
hoelzl@39097
    12
  by (subst log_less_cancel_iff) auto
hoelzl@39097
    13
hoelzl@39097
    14
lemma setsum_cartesian_product':
hoelzl@39097
    15
  "(\<Sum>x\<in>A \<times> B. f x) = (\<Sum>x\<in>A. setsum (\<lambda>y. f (x, y)) B)"
hoelzl@39097
    16
  unfolding setsum_cartesian_product by simp
hoelzl@39097
    17
hoelzl@36624
    18
section "Convex theory"
hoelzl@36080
    19
hoelzl@36624
    20
lemma log_setsum:
hoelzl@36624
    21
  assumes "finite s" "s \<noteq> {}"
hoelzl@36624
    22
  assumes "b > 1"
hoelzl@36624
    23
  assumes "(\<Sum> i \<in> s. a i) = 1"
hoelzl@36624
    24
  assumes "\<And> i. i \<in> s \<Longrightarrow> a i \<ge> 0"
hoelzl@36624
    25
  assumes "\<And> i. i \<in> s \<Longrightarrow> y i \<in> {0 <..}"
hoelzl@36624
    26
  shows "log b (\<Sum> i \<in> s. a i * y i) \<ge> (\<Sum> i \<in> s. a i * log b (y i))"
hoelzl@36624
    27
proof -
hoelzl@36624
    28
  have "convex_on {0 <..} (\<lambda> x. - log b x)"
hoelzl@36624
    29
    by (rule minus_log_convex[OF `b > 1`])
hoelzl@36624
    30
  hence "- log b (\<Sum> i \<in> s. a i * y i) \<le> (\<Sum> i \<in> s. a i * - log b (y i))"
hoelzl@36624
    31
    using convex_on_setsum[of _ _ "\<lambda> x. - log b x"] assms pos_is_convex by fastsimp
hoelzl@36624
    32
  thus ?thesis by (auto simp add:setsum_negf le_imp_neg_le)
hoelzl@36624
    33
qed
hoelzl@36080
    34
hoelzl@36624
    35
lemma log_setsum':
hoelzl@36624
    36
  assumes "finite s" "s \<noteq> {}"
hoelzl@36624
    37
  assumes "b > 1"
hoelzl@36624
    38
  assumes "(\<Sum> i \<in> s. a i) = 1"
hoelzl@36624
    39
  assumes pos: "\<And> i. i \<in> s \<Longrightarrow> 0 \<le> a i"
hoelzl@36624
    40
          "\<And> i. \<lbrakk> i \<in> s ; 0 < a i \<rbrakk> \<Longrightarrow> 0 < y i"
hoelzl@36624
    41
  shows "log b (\<Sum> i \<in> s. a i * y i) \<ge> (\<Sum> i \<in> s. a i * log b (y i))"
hoelzl@36080
    42
proof -
hoelzl@36624
    43
  have "\<And>y. (\<Sum> i \<in> s - {i. a i = 0}. a i * y i) = (\<Sum> i \<in> s. a i * y i)"
hoelzl@36624
    44
    using assms by (auto intro!: setsum_mono_zero_cong_left)
hoelzl@36624
    45
  moreover have "log b (\<Sum> i \<in> s - {i. a i = 0}. a i * y i) \<ge> (\<Sum> i \<in> s - {i. a i = 0}. a i * log b (y i))"
hoelzl@36624
    46
  proof (rule log_setsum)
hoelzl@36624
    47
    have "setsum a (s - {i. a i = 0}) = setsum a s"
hoelzl@36624
    48
      using assms(1) by (rule setsum_mono_zero_cong_left) auto
hoelzl@36624
    49
    thus sum_1: "setsum a (s - {i. a i = 0}) = 1"
hoelzl@36624
    50
      "finite (s - {i. a i = 0})" using assms by simp_all
hoelzl@36624
    51
hoelzl@36624
    52
    show "s - {i. a i = 0} \<noteq> {}"
hoelzl@36624
    53
    proof
hoelzl@36624
    54
      assume *: "s - {i. a i = 0} = {}"
hoelzl@36624
    55
      hence "setsum a (s - {i. a i = 0}) = 0" by (simp add: * setsum_empty)
hoelzl@36624
    56
      with sum_1 show False by simp
hoelzl@38656
    57
    qed
hoelzl@36624
    58
hoelzl@36624
    59
    fix i assume "i \<in> s - {i. a i = 0}"
hoelzl@36624
    60
    hence "i \<in> s" "a i \<noteq> 0" by simp_all
hoelzl@36624
    61
    thus "0 \<le> a i" "y i \<in> {0<..}" using pos[of i] by auto
hoelzl@36624
    62
  qed fact+
hoelzl@36624
    63
  ultimately show ?thesis by simp
hoelzl@36080
    64
qed
hoelzl@36080
    65
hoelzl@36624
    66
lemma log_setsum_divide:
hoelzl@36624
    67
  assumes "finite S" and "S \<noteq> {}" and "1 < b"
hoelzl@36624
    68
  assumes "(\<Sum>x\<in>S. g x) = 1"
hoelzl@36624
    69
  assumes pos: "\<And>x. x \<in> S \<Longrightarrow> g x \<ge> 0" "\<And>x. x \<in> S \<Longrightarrow> f x \<ge> 0"
hoelzl@36624
    70
  assumes g_pos: "\<And>x. \<lbrakk> x \<in> S ; 0 < g x \<rbrakk> \<Longrightarrow> 0 < f x"
hoelzl@36624
    71
  shows "- (\<Sum>x\<in>S. g x * log b (g x / f x)) \<le> log b (\<Sum>x\<in>S. f x)"
hoelzl@36624
    72
proof -
hoelzl@36624
    73
  have log_mono: "\<And>x y. 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log b x \<le> log b y"
hoelzl@36624
    74
    using `1 < b` by (subst log_le_cancel_iff) auto
hoelzl@36080
    75
hoelzl@36624
    76
  have "- (\<Sum>x\<in>S. g x * log b (g x / f x)) = (\<Sum>x\<in>S. g x * log b (f x / g x))"
hoelzl@36624
    77
  proof (unfold setsum_negf[symmetric], rule setsum_cong)
hoelzl@36624
    78
    fix x assume x: "x \<in> S"
hoelzl@36624
    79
    show "- (g x * log b (g x / f x)) = g x * log b (f x / g x)"
hoelzl@36624
    80
    proof (cases "g x = 0")
hoelzl@36624
    81
      case False
hoelzl@36624
    82
      with pos[OF x] g_pos[OF x] have "0 < f x" "0 < g x" by simp_all
hoelzl@36624
    83
      thus ?thesis using `1 < b` by (simp add: log_divide field_simps)
hoelzl@36624
    84
    qed simp
hoelzl@36624
    85
  qed rule
hoelzl@36624
    86
  also have "... \<le> log b (\<Sum>x\<in>S. g x * (f x / g x))"
hoelzl@36624
    87
  proof (rule log_setsum')
hoelzl@36624
    88
    fix x assume x: "x \<in> S" "0 < g x"
hoelzl@36624
    89
    with g_pos[OF x] show "0 < f x / g x" by (safe intro!: divide_pos_pos)
hoelzl@36624
    90
  qed fact+
hoelzl@36624
    91
  also have "... = log b (\<Sum>x\<in>S - {x. g x = 0}. f x)" using `finite S`
hoelzl@36624
    92
    by (auto intro!: setsum_mono_zero_cong_right arg_cong[where f="log b"]
hoelzl@36624
    93
        split: split_if_asm)
hoelzl@36624
    94
  also have "... \<le> log b (\<Sum>x\<in>S. f x)"
hoelzl@36624
    95
  proof (rule log_mono)
hoelzl@36624
    96
    have "0 = (\<Sum>x\<in>S - {x. g x = 0}. 0)" by simp
hoelzl@36624
    97
    also have "... < (\<Sum>x\<in>S - {x. g x = 0}. f x)" (is "_ < ?sum")
hoelzl@36624
    98
    proof (rule setsum_strict_mono)
hoelzl@36624
    99
      show "finite (S - {x. g x = 0})" using `finite S` by simp
hoelzl@36624
   100
      show "S - {x. g x = 0} \<noteq> {}"
hoelzl@36624
   101
      proof
hoelzl@36624
   102
        assume "S - {x. g x = 0} = {}"
hoelzl@36624
   103
        hence "(\<Sum>x\<in>S. g x) = 0" by (subst setsum_0') auto
hoelzl@36624
   104
        with `(\<Sum>x\<in>S. g x) = 1` show False by simp
hoelzl@36624
   105
      qed
hoelzl@36624
   106
      fix x assume "x \<in> S - {x. g x = 0}"
hoelzl@36624
   107
      thus "0 < f x" using g_pos[of x] pos(1)[of x] by auto
hoelzl@36624
   108
    qed
hoelzl@36624
   109
    finally show "0 < ?sum" .
hoelzl@36624
   110
    show "(\<Sum>x\<in>S - {x. g x = 0}. f x) \<le> (\<Sum>x\<in>S. f x)"
hoelzl@36624
   111
      using `finite S` pos by (auto intro!: setsum_mono2)
hoelzl@36080
   112
  qed
hoelzl@36624
   113
  finally show ?thesis .
hoelzl@36080
   114
qed
hoelzl@36080
   115
hoelzl@39097
   116
lemma split_pairs:
hoelzl@40859
   117
  "((A, B) = X) \<longleftrightarrow> (fst X = A \<and> snd X = B)" and
hoelzl@40859
   118
  "(X = (A, B)) \<longleftrightarrow> (fst X = A \<and> snd X = B)" by auto
hoelzl@38656
   119
hoelzl@38656
   120
section "Information theory"
hoelzl@38656
   121
hoelzl@40859
   122
locale information_space = prob_space +
hoelzl@38656
   123
  fixes b :: real assumes b_gt_1: "1 < b"
hoelzl@38656
   124
hoelzl@40859
   125
context information_space
hoelzl@38656
   126
begin
hoelzl@38656
   127
hoelzl@40859
   128
text {* Introduce some simplification rules for logarithm of base @{term b}. *}
hoelzl@40859
   129
hoelzl@40859
   130
lemma log_neg_const:
hoelzl@40859
   131
  assumes "x \<le> 0"
hoelzl@40859
   132
  shows "log b x = log b 0"
hoelzl@36624
   133
proof -
hoelzl@40859
   134
  { fix u :: real
hoelzl@40859
   135
    have "x \<le> 0" by fact
hoelzl@40859
   136
    also have "0 < exp u"
hoelzl@40859
   137
      using exp_gt_zero .
hoelzl@40859
   138
    finally have "exp u \<noteq> x"
hoelzl@40859
   139
      by auto }
hoelzl@40859
   140
  then show "log b x = log b 0"
hoelzl@40859
   141
    by (simp add: log_def ln_def)
hoelzl@38656
   142
qed
hoelzl@38656
   143
hoelzl@40859
   144
lemma log_mult_eq:
hoelzl@40859
   145
  "log b (A * B) = (if 0 < A * B then log b \<bar>A\<bar> + log b \<bar>B\<bar> else log b 0)"
hoelzl@40859
   146
  using log_mult[of b "\<bar>A\<bar>" "\<bar>B\<bar>"] b_gt_1 log_neg_const[of "A * B"]
hoelzl@40859
   147
  by (auto simp: zero_less_mult_iff mult_le_0_iff)
hoelzl@38656
   148
hoelzl@40859
   149
lemma log_inverse_eq:
hoelzl@40859
   150
  "log b (inverse B) = (if 0 < B then - log b B else log b 0)"
hoelzl@40859
   151
  using log_inverse[of b B] log_neg_const[of "inverse B"] b_gt_1 by simp
hoelzl@36080
   152
hoelzl@40859
   153
lemma log_divide_eq:
hoelzl@40859
   154
  "log b (A / B) = (if 0 < A * B then log b \<bar>A\<bar> - log b \<bar>B\<bar> else log b 0)"
hoelzl@40859
   155
  unfolding divide_inverse log_mult_eq log_inverse_eq abs_inverse
hoelzl@40859
   156
  by (auto simp: zero_less_mult_iff mult_le_0_iff)
hoelzl@38656
   157
hoelzl@40859
   158
lemmas log_simps = log_mult_eq log_inverse_eq log_divide_eq
hoelzl@38656
   159
hoelzl@38656
   160
end
hoelzl@38656
   161
hoelzl@39097
   162
subsection "Kullback$-$Leibler divergence"
hoelzl@36080
   163
hoelzl@39097
   164
text {* The Kullback$-$Leibler divergence is also known as relative entropy or
hoelzl@39097
   165
Kullback$-$Leibler distance. *}
hoelzl@39097
   166
hoelzl@39097
   167
definition
hoelzl@41689
   168
  "KL_divergence b M \<nu> = \<integral>x. log b (real (RN_deriv M \<nu> x)) \<partial>M\<lparr>measure := \<nu>\<rparr>"
hoelzl@38656
   169
hoelzl@40859
   170
lemma (in sigma_finite_measure) KL_divergence_cong:
hoelzl@41689
   171
  assumes "measure_space (M\<lparr>measure := \<nu>\<rparr>)" (is "measure_space ?\<nu>")
hoelzl@41689
   172
  assumes [simp]: "sets N = sets M" "space N = space M"
hoelzl@41689
   173
    "\<And>A. A \<in> sets M \<Longrightarrow> measure N A = \<mu> A"
hoelzl@41689
   174
    "\<And>A. A \<in> sets M \<Longrightarrow> \<nu> A = \<nu>' A"
hoelzl@41689
   175
  shows "KL_divergence b M \<nu> = KL_divergence b N \<nu>'"
hoelzl@40859
   176
proof -
hoelzl@41689
   177
  interpret \<nu>: measure_space ?\<nu> by fact
hoelzl@41689
   178
  have "KL_divergence b M \<nu> = \<integral>x. log b (real (RN_deriv N \<nu>' x)) \<partial>?\<nu>"
hoelzl@41689
   179
    by (simp cong: RN_deriv_cong \<nu>.integral_cong add: KL_divergence_def)
hoelzl@41689
   180
  also have "\<dots> = KL_divergence b N \<nu>'"
hoelzl@41689
   181
    by (auto intro!: \<nu>.integral_cong_measure[symmetric] simp: KL_divergence_def)
hoelzl@41689
   182
  finally show ?thesis .
hoelzl@40859
   183
qed
hoelzl@40859
   184
hoelzl@38656
   185
lemma (in finite_measure_space) KL_divergence_eq_finite:
hoelzl@41689
   186
  assumes v: "finite_measure_space (M\<lparr>measure := \<nu>\<rparr>)"
hoelzl@40859
   187
  assumes ac: "absolutely_continuous \<nu>"
hoelzl@41689
   188
  shows "KL_divergence b M \<nu> = (\<Sum>x\<in>space M. real (\<nu> {x}) * log b (real (\<nu> {x}) / real (\<mu> {x})))" (is "_ = ?sum")
hoelzl@38656
   189
proof (simp add: KL_divergence_def finite_measure_space.integral_finite_singleton[OF v])
hoelzl@41689
   190
  interpret v: finite_measure_space "M\<lparr>measure := \<nu>\<rparr>" by fact
hoelzl@41689
   191
  have ms: "measure_space (M\<lparr>measure := \<nu>\<rparr>)" by default
hoelzl@41689
   192
  show "(\<Sum>x \<in> space M. log b (real (RN_deriv M \<nu> x)) * real (\<nu> {x})) = ?sum"
hoelzl@38656
   193
    using RN_deriv_finite_measure[OF ms ac]
hoelzl@41023
   194
    by (auto intro!: setsum_cong simp: field_simps real_of_pextreal_mult[symmetric])
hoelzl@38656
   195
qed
hoelzl@36080
   196
hoelzl@38656
   197
lemma (in finite_prob_space) KL_divergence_positive_finite:
hoelzl@41689
   198
  assumes v: "finite_prob_space (M\<lparr>measure := \<nu>\<rparr>)"
hoelzl@40859
   199
  assumes ac: "absolutely_continuous \<nu>"
hoelzl@38656
   200
  and "1 < b"
hoelzl@41689
   201
  shows "0 \<le> KL_divergence b M \<nu>"
hoelzl@38656
   202
proof -
hoelzl@41689
   203
  interpret v: finite_prob_space "M\<lparr>measure := \<nu>\<rparr>" by fact
hoelzl@41689
   204
  have ms: "finite_measure_space (M\<lparr>measure := \<nu>\<rparr>)" by default
hoelzl@38656
   205
hoelzl@41689
   206
  have "- (KL_divergence b M \<nu>) \<le> log b (\<Sum>x\<in>space M. real (\<mu> {x}))"
hoelzl@40859
   207
  proof (subst KL_divergence_eq_finite[OF ms ac], safe intro!: log_setsum_divide not_empty)
hoelzl@40859
   208
    show "finite (space M)" using finite_space by simp
hoelzl@40859
   209
    show "1 < b" by fact
hoelzl@40859
   210
    show "(\<Sum>x\<in>space M. real (\<nu> {x})) = 1" using v.finite_sum_over_space_eq_1 by simp
hoelzl@38656
   211
hoelzl@40859
   212
    fix x assume "x \<in> space M"
hoelzl@40859
   213
    then have x: "{x} \<in> sets M" unfolding sets_eq_Pow by auto
hoelzl@40859
   214
    { assume "0 < real (\<nu> {x})"
hoelzl@40859
   215
      then have "\<nu> {x} \<noteq> 0" by auto
hoelzl@40859
   216
      then have "\<mu> {x} \<noteq> 0"
hoelzl@40859
   217
        using ac[unfolded absolutely_continuous_def, THEN bspec, of "{x}"] x by auto
hoelzl@40859
   218
      thus "0 < prob {x}" using finite_measure[of "{x}"] x by auto }
hoelzl@40859
   219
  qed auto
hoelzl@41689
   220
  thus "0 \<le> KL_divergence b M \<nu>" using finite_sum_over_space_eq_1 by simp
hoelzl@36080
   221
qed
hoelzl@36080
   222
hoelzl@39097
   223
subsection {* Mutual Information *}
hoelzl@39097
   224
hoelzl@36080
   225
definition (in prob_space)
hoelzl@38656
   226
  "mutual_information b S T X Y =
hoelzl@41689
   227
    KL_divergence b (S\<lparr>measure := distribution X\<rparr> \<Otimes>\<^isub>M T\<lparr>measure := distribution Y\<rparr>)
hoelzl@41689
   228
      (joint_distribution X Y)"
hoelzl@36080
   229
hoelzl@40859
   230
definition (in prob_space)
hoelzl@40859
   231
  "entropy b s X = mutual_information b s s X X"
hoelzl@40859
   232
hoelzl@40859
   233
abbreviation (in information_space)
hoelzl@40859
   234
  mutual_information_Pow ("\<I>'(_ ; _')") where
hoelzl@36624
   235
  "\<I>(X ; Y) \<equiv> mutual_information b
hoelzl@41689
   236
    \<lparr> space = X`space M, sets = Pow (X`space M), measure = distribution X \<rparr>
hoelzl@41689
   237
    \<lparr> space = Y`space M, sets = Pow (Y`space M), measure = distribution Y \<rparr> X Y"
hoelzl@41689
   238
hoelzl@41689
   239
lemma algebra_measure_update[simp]:
hoelzl@41689
   240
  "algebra (M'\<lparr>measure := m\<rparr>) \<longleftrightarrow> algebra M'"
hoelzl@41689
   241
  unfolding algebra_def by simp
hoelzl@41689
   242
hoelzl@41689
   243
lemma sigma_algebra_measure_update[simp]:
hoelzl@41689
   244
  "sigma_algebra (M'\<lparr>measure := m\<rparr>) \<longleftrightarrow> sigma_algebra M'"
hoelzl@41689
   245
  unfolding sigma_algebra_def sigma_algebra_axioms_def by simp
hoelzl@41689
   246
hoelzl@41689
   247
lemma finite_sigma_algebra_measure_update[simp]:
hoelzl@41689
   248
  "finite_sigma_algebra (M'\<lparr>measure := m\<rparr>) \<longleftrightarrow> finite_sigma_algebra M'"
hoelzl@41689
   249
  unfolding finite_sigma_algebra_def finite_sigma_algebra_axioms_def by simp
hoelzl@36080
   250
hoelzl@40859
   251
lemma (in prob_space) finite_variables_absolutely_continuous:
hoelzl@40859
   252
  assumes X: "finite_random_variable S X" and Y: "finite_random_variable T Y"
hoelzl@41689
   253
  shows "measure_space.absolutely_continuous
hoelzl@41689
   254
    (S\<lparr>measure := distribution X\<rparr> \<Otimes>\<^isub>M T\<lparr>measure := distribution Y\<rparr>)
hoelzl@41689
   255
    (joint_distribution X Y)"
hoelzl@40859
   256
proof -
hoelzl@41689
   257
  interpret X: finite_prob_space "S\<lparr>measure := distribution X\<rparr>"
hoelzl@41689
   258
    using X by (rule distribution_finite_prob_space)
hoelzl@41689
   259
  interpret Y: finite_prob_space "T\<lparr>measure := distribution Y\<rparr>"
hoelzl@41689
   260
    using Y by (rule distribution_finite_prob_space)
hoelzl@41689
   261
  interpret XY: pair_finite_prob_space
hoelzl@41689
   262
    "S\<lparr>measure := distribution X\<rparr>" "T\<lparr> measure := distribution Y\<rparr>" by default
hoelzl@41689
   263
  interpret P: finite_prob_space "XY.P\<lparr> measure := joint_distribution X Y\<rparr>"
hoelzl@41689
   264
    using assms by (auto intro!: joint_distribution_finite_prob_space)
hoelzl@41689
   265
  note rv = assms[THEN finite_random_variableD]
hoelzl@40859
   266
  show "XY.absolutely_continuous (joint_distribution X Y)"
hoelzl@40859
   267
  proof (rule XY.absolutely_continuousI)
hoelzl@41689
   268
    show "finite_measure_space (XY.P\<lparr> measure := joint_distribution X Y\<rparr>)" by default
hoelzl@41689
   269
    fix x assume "x \<in> space XY.P" and "XY.\<mu> {x} = 0"
hoelzl@40859
   270
    then obtain a b where "(a, b) = x" and "a \<in> space S" "b \<in> space T"
hoelzl@40859
   271
      and distr: "distribution X {a} * distribution Y {b} = 0"
hoelzl@41689
   272
      by (cases x) (auto simp: space_pair_measure)
hoelzl@41689
   273
    with X.sets_eq_Pow Y.sets_eq_Pow
hoelzl@41689
   274
      joint_distribution_Times_le_fst[OF rv, of "{a}" "{b}"]
hoelzl@41689
   275
      joint_distribution_Times_le_snd[OF rv, of "{a}" "{b}"]
hoelzl@40859
   276
    have "joint_distribution X Y {x} \<le> distribution Y {b}"
hoelzl@40859
   277
         "joint_distribution X Y {x} \<le> distribution X {a}"
hoelzl@41689
   278
      by (auto simp del: X.sets_eq_Pow Y.sets_eq_Pow)
hoelzl@40859
   279
    with distr show "joint_distribution X Y {x} = 0" by auto
hoelzl@40859
   280
  qed
hoelzl@40859
   281
qed
hoelzl@40859
   282
hoelzl@40859
   283
lemma (in information_space)
hoelzl@40859
   284
  assumes MX: "finite_random_variable MX X"
hoelzl@40859
   285
  assumes MY: "finite_random_variable MY Y"
hoelzl@40859
   286
  shows mutual_information_generic_eq:
hoelzl@36624
   287
    "mutual_information b MX MY X Y = (\<Sum> (x,y) \<in> space MX \<times> space MY.
hoelzl@38656
   288
      real (joint_distribution X Y {(x,y)}) *
hoelzl@38656
   289
      log b (real (joint_distribution X Y {(x,y)}) /
hoelzl@38656
   290
      (real (distribution X {x}) * real (distribution Y {y}))))"
hoelzl@40859
   291
    (is ?sum)
hoelzl@36624
   292
  and mutual_information_positive_generic:
hoelzl@40859
   293
     "0 \<le> mutual_information b MX MY X Y" (is ?positive)
hoelzl@36624
   294
proof -
hoelzl@41689
   295
  interpret X: finite_prob_space "MX\<lparr>measure := distribution X\<rparr>"
hoelzl@41689
   296
    using MX by (rule distribution_finite_prob_space)
hoelzl@41689
   297
  interpret Y: finite_prob_space "MY\<lparr>measure := distribution Y\<rparr>"
hoelzl@41689
   298
    using MY by (rule distribution_finite_prob_space)
hoelzl@41689
   299
  interpret XY: pair_finite_prob_space "MX\<lparr>measure := distribution X\<rparr>" "MY\<lparr>measure := distribution Y\<rparr>" by default
hoelzl@41689
   300
  interpret P: finite_prob_space "XY.P\<lparr>measure := joint_distribution X Y\<rparr>"
hoelzl@41689
   301
    using assms by (auto intro!: joint_distribution_finite_prob_space)
hoelzl@36080
   302
hoelzl@41689
   303
  have P_ms: "finite_measure_space (XY.P\<lparr>measure :=joint_distribution X Y\<rparr>)" by default
hoelzl@41689
   304
  have P_ps: "finite_prob_space (XY.P\<lparr>measure := joint_distribution X Y\<rparr>)" by default
hoelzl@36624
   305
hoelzl@40859
   306
  show ?sum
hoelzl@38656
   307
    unfolding Let_def mutual_information_def
hoelzl@40859
   308
    by (subst XY.KL_divergence_eq_finite[OF P_ms finite_variables_absolutely_continuous[OF MX MY]])
hoelzl@41689
   309
       (auto simp add: space_pair_measure setsum_cartesian_product' real_of_pextreal_mult[symmetric])
hoelzl@36080
   310
hoelzl@36624
   311
  show ?positive
hoelzl@40859
   312
    using XY.KL_divergence_positive_finite[OF P_ps finite_variables_absolutely_continuous[OF MX MY] b_gt_1]
hoelzl@40859
   313
    unfolding mutual_information_def .
hoelzl@36080
   314
qed
hoelzl@36080
   315
hoelzl@41661
   316
lemma (in information_space) mutual_information_commute:
hoelzl@41661
   317
  assumes X: "finite_random_variable S X" and Y: "finite_random_variable T Y"
hoelzl@41661
   318
  shows "mutual_information b S T X Y = mutual_information b T S Y X"
hoelzl@41661
   319
  unfolding mutual_information_generic_eq[OF X Y] mutual_information_generic_eq[OF Y X]
hoelzl@41661
   320
  unfolding joint_distribution_commute_singleton[of X Y]
hoelzl@41661
   321
  by (auto simp add: ac_simps intro!: setsum_reindex_cong[OF swap_inj_on])
hoelzl@41661
   322
hoelzl@41661
   323
lemma (in information_space) mutual_information_commute_simple:
hoelzl@41689
   324
  assumes X: "simple_function M X" and Y: "simple_function M Y"
hoelzl@41661
   325
  shows "\<I>(X;Y) = \<I>(Y;X)"
hoelzl@41661
   326
  by (intro X Y simple_function_imp_finite_random_variable mutual_information_commute)
hoelzl@41661
   327
hoelzl@40859
   328
lemma (in information_space) mutual_information_eq:
hoelzl@41689
   329
  assumes "simple_function M X" "simple_function M Y"
hoelzl@40859
   330
  shows "\<I>(X;Y) = (\<Sum> (x,y) \<in> X ` space M \<times> Y ` space M.
hoelzl@38656
   331
    real (distribution (\<lambda>x. (X x, Y x)) {(x,y)}) * log b (real (distribution (\<lambda>x. (X x, Y x)) {(x,y)}) /
hoelzl@38656
   332
                                                   (real (distribution X {x}) * real (distribution Y {y}))))"
hoelzl@40859
   333
  using assms by (simp add: mutual_information_generic_eq)
hoelzl@36080
   334
hoelzl@40859
   335
lemma (in information_space) mutual_information_generic_cong:
hoelzl@39097
   336
  assumes X: "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
hoelzl@39097
   337
  assumes Y: "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
hoelzl@40859
   338
  shows "mutual_information b MX MY X Y = mutual_information b MX MY X' Y'"
hoelzl@40859
   339
  unfolding mutual_information_def using X Y
hoelzl@40859
   340
  by (simp cong: distribution_cong)
hoelzl@39097
   341
hoelzl@40859
   342
lemma (in information_space) mutual_information_cong:
hoelzl@40859
   343
  assumes X: "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
hoelzl@40859
   344
  assumes Y: "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
hoelzl@40859
   345
  shows "\<I>(X; Y) = \<I>(X'; Y')"
hoelzl@40859
   346
  unfolding mutual_information_def using X Y
hoelzl@40859
   347
  by (simp cong: distribution_cong image_cong)
hoelzl@40859
   348
hoelzl@40859
   349
lemma (in information_space) mutual_information_positive:
hoelzl@41689
   350
  assumes "simple_function M X" "simple_function M Y"
hoelzl@40859
   351
  shows "0 \<le> \<I>(X;Y)"
hoelzl@40859
   352
  using assms by (simp add: mutual_information_positive_generic)
hoelzl@36080
   353
hoelzl@39097
   354
subsection {* Entropy *}
hoelzl@39097
   355
hoelzl@40859
   356
abbreviation (in information_space)
hoelzl@40859
   357
  entropy_Pow ("\<H>'(_')") where
hoelzl@41689
   358
  "\<H>(X) \<equiv> entropy b \<lparr> space = X`space M, sets = Pow (X`space M), measure = distribution X \<rparr> X"
hoelzl@36080
   359
hoelzl@40859
   360
lemma (in information_space) entropy_generic_eq:
hoelzl@40859
   361
  assumes MX: "finite_random_variable MX X"
hoelzl@39097
   362
  shows "entropy b MX X = -(\<Sum> x \<in> space MX. real (distribution X {x}) * log b (real (distribution X {x})))"
hoelzl@39097
   363
proof -
hoelzl@41689
   364
  interpret MX: finite_prob_space "MX\<lparr>measure := distribution X\<rparr>"
hoelzl@41689
   365
    using MX by (rule distribution_finite_prob_space)
hoelzl@39097
   366
  let "?X x" = "real (distribution X {x})"
hoelzl@39097
   367
  let "?XX x y" = "real (joint_distribution X X {(x, y)})"
hoelzl@39097
   368
  { fix x y
hoelzl@39097
   369
    have "(\<lambda>x. (X x, X x)) -` {(x, y)} = (if x = y then X -` {x} else {})" by auto
hoelzl@39097
   370
    then have "?XX x y * log b (?XX x y / (?X x * ?X y)) =
hoelzl@39097
   371
        (if x = y then - ?X y * log b (?X y) else 0)"
hoelzl@40859
   372
      unfolding distribution_def by (auto simp: log_simps zero_less_mult_iff) }
hoelzl@39097
   373
  note remove_XX = this
hoelzl@39097
   374
  show ?thesis
hoelzl@39097
   375
    unfolding entropy_def mutual_information_generic_eq[OF MX MX]
hoelzl@39097
   376
    unfolding setsum_cartesian_product[symmetric] setsum_negf[symmetric] remove_XX
hoelzl@41689
   377
    using MX.finite_space by (auto simp: setsum_cases)
hoelzl@39097
   378
qed
hoelzl@36624
   379
hoelzl@40859
   380
lemma (in information_space) entropy_eq:
hoelzl@41689
   381
  assumes "simple_function M X"
hoelzl@40859
   382
  shows "\<H>(X) = -(\<Sum> x \<in> X ` space M. real (distribution X {x}) * log b (real (distribution X {x})))"
hoelzl@40859
   383
  using assms by (simp add: entropy_generic_eq)
hoelzl@36080
   384
hoelzl@40859
   385
lemma (in information_space) entropy_positive:
hoelzl@41689
   386
  "simple_function M X \<Longrightarrow> 0 \<le> \<H>(X)"
hoelzl@40859
   387
  unfolding entropy_def by (simp add: mutual_information_positive)
hoelzl@36080
   388
hoelzl@40859
   389
lemma (in information_space) entropy_certainty_eq_0:
hoelzl@41689
   390
  assumes "simple_function M X" and "x \<in> X ` space M" and "distribution X {x} = 1"
hoelzl@39097
   391
  shows "\<H>(X) = 0"
hoelzl@39097
   392
proof -
hoelzl@41689
   393
  let ?X = "\<lparr> space = X ` space M, sets = Pow (X ` space M), measure = distribution X\<rparr>"
hoelzl@41689
   394
  note simple_function_imp_finite_random_variable[OF `simple_function M X`]
hoelzl@41689
   395
  from distribution_finite_prob_space[OF this, of "\<lparr> measure = distribution X \<rparr>"]
hoelzl@41689
   396
  interpret X: finite_prob_space ?X by simp
hoelzl@39097
   397
  have "distribution X (X ` space M - {x}) = distribution X (X ` space M) - distribution X {x}"
hoelzl@39097
   398
    using X.measure_compl[of "{x}"] assms by auto
hoelzl@39097
   399
  also have "\<dots> = 0" using X.prob_space assms by auto
hoelzl@39097
   400
  finally have X0: "distribution X (X ` space M - {x}) = 0" by auto
hoelzl@39097
   401
  { fix y assume asm: "y \<noteq> x" "y \<in> X ` space M"
hoelzl@39097
   402
    hence "{y} \<subseteq> X ` space M - {x}" by auto
hoelzl@39097
   403
    from X.measure_mono[OF this] X0 asm
hoelzl@39097
   404
    have "distribution X {y} = 0" by auto }
hoelzl@39097
   405
  hence fi: "\<And> y. y \<in> X ` space M \<Longrightarrow> real (distribution X {y}) = (if x = y then 1 else 0)"
hoelzl@39097
   406
    using assms by auto
hoelzl@39097
   407
  have y: "\<And>y. (if x = y then 1 else 0) * log b (if x = y then 1 else 0) = 0" by simp
hoelzl@41689
   408
  show ?thesis unfolding entropy_eq[OF `simple_function M X`] by (auto simp: y fi)
hoelzl@39097
   409
qed
hoelzl@39097
   410
hoelzl@40859
   411
lemma (in information_space) entropy_le_card_not_0:
hoelzl@41689
   412
  assumes "simple_function M X"
hoelzl@40859
   413
  shows "\<H>(X) \<le> log b (real (card (X ` space M \<inter> {x . distribution X {x} \<noteq> 0})))"
hoelzl@39097
   414
proof -
hoelzl@39097
   415
  let "?d x" = "distribution X {x}"
hoelzl@39097
   416
  let "?p x" = "real (?d x)"
hoelzl@39097
   417
  have "\<H>(X) = (\<Sum>x\<in>X`space M. ?p x * log b (1 / ?p x))"
hoelzl@41689
   418
    by (auto intro!: setsum_cong simp: entropy_eq[OF `simple_function M X`] setsum_negf[symmetric] log_simps not_less)
hoelzl@39097
   419
  also have "\<dots> \<le> log b (\<Sum>x\<in>X`space M. ?p x * (1 / ?p x))"
hoelzl@39097
   420
    apply (rule log_setsum')
hoelzl@41689
   421
    using not_empty b_gt_1 `simple_function M X` sum_over_space_real_distribution
hoelzl@40859
   422
    by (auto simp: simple_function_def)
hoelzl@39097
   423
  also have "\<dots> = log b (\<Sum>x\<in>X`space M. if ?d x \<noteq> 0 then 1 else 0)"
hoelzl@41689
   424
    using distribution_finite[OF `simple_function M X`[THEN simple_function_imp_random_variable], simplified]
hoelzl@41023
   425
    by (intro arg_cong[where f="\<lambda>X. log b X"] setsum_cong) (auto simp: real_of_pextreal_eq_0)
hoelzl@39097
   426
  finally show ?thesis
hoelzl@41689
   427
    using `simple_function M X` by (auto simp: setsum_cases real_eq_of_nat simple_function_def)
hoelzl@39097
   428
qed
hoelzl@39097
   429
hoelzl@40859
   430
lemma (in information_space) entropy_uniform_max:
hoelzl@41689
   431
  assumes "simple_function M X"
hoelzl@39097
   432
  assumes "\<And>x y. \<lbrakk> x \<in> X ` space M ; y \<in> X ` space M \<rbrakk> \<Longrightarrow> distribution X {x} = distribution X {y}"
hoelzl@39097
   433
  shows "\<H>(X) = log b (real (card (X ` space M)))"
hoelzl@39097
   434
proof -
hoelzl@41689
   435
  let ?X = "\<lparr> space = X ` space M, sets = Pow (X ` space M), measure = distribution X\<rparr>"
hoelzl@41689
   436
  note simple_function_imp_finite_random_variable[OF `simple_function M X`]
hoelzl@41689
   437
  from distribution_finite_prob_space[OF this, of "\<lparr> measure = distribution X \<rparr>"]
hoelzl@41689
   438
  interpret X: finite_prob_space ?X by simp
hoelzl@39097
   439
  have card_gt0: "0 < card (X ` space M)" unfolding card_gt_0_iff
hoelzl@41689
   440
    using `simple_function M X` not_empty by (auto simp: simple_function_def)
hoelzl@39097
   441
  { fix x assume "x \<in> X ` space M"
hoelzl@39097
   442
    hence "real (distribution X {x}) = 1 / real (card (X ` space M))"
hoelzl@40859
   443
    proof (rule X.uniform_prob[simplified])
hoelzl@39097
   444
      fix x y assume "x \<in> X`space M" "y \<in> X`space M"
hoelzl@40859
   445
      from assms(2)[OF this] show "real (distribution X {x}) = real (distribution X {y})" by simp
hoelzl@39097
   446
    qed }
hoelzl@39097
   447
  thus ?thesis
hoelzl@40859
   448
    using not_empty X.finite_space b_gt_1 card_gt0
hoelzl@41689
   449
    by (simp add: entropy_eq[OF `simple_function M X`] real_eq_of_nat[symmetric] log_simps)
hoelzl@39097
   450
qed
hoelzl@39097
   451
hoelzl@40859
   452
lemma (in information_space) entropy_le_card:
hoelzl@41689
   453
  assumes "simple_function M X"
hoelzl@40859
   454
  shows "\<H>(X) \<le> log b (real (card (X ` space M)))"
hoelzl@39097
   455
proof cases
hoelzl@39097
   456
  assume "X ` space M \<inter> {x. distribution X {x} \<noteq> 0} = {}"
hoelzl@39097
   457
  then have "\<And>x. x\<in>X`space M \<Longrightarrow> distribution X {x} = 0" by auto
hoelzl@39097
   458
  moreover
hoelzl@39097
   459
  have "0 < card (X`space M)"
hoelzl@41689
   460
    using `simple_function M X` not_empty
hoelzl@40859
   461
    by (auto simp: card_gt_0_iff simple_function_def)
hoelzl@39097
   462
  then have "log b 1 \<le> log b (real (card (X`space M)))"
hoelzl@39097
   463
    using b_gt_1 by (intro log_le) auto
hoelzl@40859
   464
  ultimately show ?thesis using assms by (simp add: entropy_eq)
hoelzl@39097
   465
next
hoelzl@39097
   466
  assume False: "X ` space M \<inter> {x. distribution X {x} \<noteq> 0} \<noteq> {}"
hoelzl@39097
   467
  have "card (X ` space M \<inter> {x. distribution X {x} \<noteq> 0}) \<le> card (X ` space M)"
hoelzl@40859
   468
    (is "?A \<le> ?B") using assms not_empty by (auto intro!: card_mono simp: simple_function_def)
hoelzl@40859
   469
  note entropy_le_card_not_0[OF assms]
hoelzl@39097
   470
  also have "log b (real ?A) \<le> log b (real ?B)"
hoelzl@40859
   471
    using b_gt_1 False not_empty `?A \<le> ?B` assms
hoelzl@40859
   472
    by (auto intro!: log_le simp: card_gt_0_iff simp: simple_function_def)
hoelzl@39097
   473
  finally show ?thesis .
hoelzl@39097
   474
qed
hoelzl@39097
   475
hoelzl@40859
   476
lemma (in information_space) entropy_commute:
hoelzl@41689
   477
  assumes "simple_function M X" "simple_function M Y"
hoelzl@40859
   478
  shows "\<H>(\<lambda>x. (X x, Y x)) = \<H>(\<lambda>x. (Y x, X x))"
hoelzl@39097
   479
proof -
hoelzl@41689
   480
  have sf: "simple_function M (\<lambda>x. (X x, Y x))" "simple_function M (\<lambda>x. (Y x, X x))"
hoelzl@40859
   481
    using assms by (auto intro: simple_function_Pair)
hoelzl@39097
   482
  have *: "(\<lambda>x. (Y x, X x))`space M = (\<lambda>(a,b). (b,a))`(\<lambda>x. (X x, Y x))`space M"
hoelzl@39097
   483
    by auto
hoelzl@39097
   484
  have inj: "\<And>X. inj_on (\<lambda>(a,b). (b,a)) X"
hoelzl@39097
   485
    by (auto intro!: inj_onI)
hoelzl@39097
   486
  show ?thesis
hoelzl@40859
   487
    unfolding sf[THEN entropy_eq] unfolding * setsum_reindex[OF inj]
hoelzl@39097
   488
    by (simp add: joint_distribution_commute[of Y X] split_beta)
hoelzl@39097
   489
qed
hoelzl@39097
   490
hoelzl@40859
   491
lemma (in information_space) entropy_eq_cartesian_product:
hoelzl@41689
   492
  assumes "simple_function M X" "simple_function M Y"
hoelzl@40859
   493
  shows "\<H>(\<lambda>x. (X x, Y x)) = -(\<Sum>x\<in>X`space M. \<Sum>y\<in>Y`space M.
hoelzl@39097
   494
    real (joint_distribution X Y {(x,y)}) *
hoelzl@39097
   495
    log b (real (joint_distribution X Y {(x,y)})))"
hoelzl@39097
   496
proof -
hoelzl@41689
   497
  have sf: "simple_function M (\<lambda>x. (X x, Y x))"
hoelzl@40859
   498
    using assms by (auto intro: simple_function_Pair)
hoelzl@39097
   499
  { fix x assume "x\<notin>(\<lambda>x. (X x, Y x))`space M"
hoelzl@39097
   500
    then have "(\<lambda>x. (X x, Y x)) -` {x} \<inter> space M = {}" by auto
hoelzl@39097
   501
    then have "joint_distribution X Y {x} = 0"
hoelzl@39097
   502
      unfolding distribution_def by auto }
hoelzl@40859
   503
  then show ?thesis using sf assms
hoelzl@40859
   504
    unfolding entropy_eq[OF sf] neg_equal_iff_equal setsum_cartesian_product
hoelzl@40859
   505
    by (auto intro!: setsum_mono_zero_cong_left simp: simple_function_def)
hoelzl@39097
   506
qed
hoelzl@39097
   507
hoelzl@39097
   508
subsection {* Conditional Mutual Information *}
hoelzl@39097
   509
hoelzl@36080
   510
definition (in prob_space)
hoelzl@41689
   511
  "conditional_mutual_information b MX MY MZ X Y Z \<equiv>
hoelzl@41689
   512
    mutual_information b MX (MY \<Otimes>\<^isub>M MZ) X (\<lambda>x. (Y x, Z x)) -
hoelzl@41689
   513
    mutual_information b MX MZ X Z"
hoelzl@36080
   514
hoelzl@40859
   515
abbreviation (in information_space)
hoelzl@40859
   516
  conditional_mutual_information_Pow ("\<I>'( _ ; _ | _ ')") where
hoelzl@36624
   517
  "\<I>(X ; Y | Z) \<equiv> conditional_mutual_information b
hoelzl@41689
   518
    \<lparr> space = X`space M, sets = Pow (X`space M), measure = distribution X \<rparr>
hoelzl@41689
   519
    \<lparr> space = Y`space M, sets = Pow (Y`space M), measure = distribution Y \<rparr>
hoelzl@41689
   520
    \<lparr> space = Z`space M, sets = Pow (Z`space M), measure = distribution Z \<rparr>
hoelzl@36080
   521
    X Y Z"
hoelzl@36080
   522
hoelzl@40859
   523
lemma (in information_space) conditional_mutual_information_generic_eq:
hoelzl@40859
   524
  assumes MX: "finite_random_variable MX X"
hoelzl@40859
   525
    and MY: "finite_random_variable MY Y"
hoelzl@40859
   526
    and MZ: "finite_random_variable MZ Z"
hoelzl@40859
   527
  shows "conditional_mutual_information b MX MY MZ X Y Z = (\<Sum>(x, y, z) \<in> space MX \<times> space MY \<times> space MZ.
hoelzl@38656
   528
             real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) *
hoelzl@38656
   529
             log b (real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) /
hoelzl@38656
   530
    (real (joint_distribution X Z {(x, z)}) * real (joint_distribution Y Z {(y,z)} / distribution Z {z}))))"
hoelzl@40859
   531
  (is "_ = (\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * log b (?XYZ x y z / (?XZ x z * ?YZdZ y z)))")
hoelzl@40859
   532
proof -
hoelzl@40859
   533
  let ?YZ = "\<lambda>y z. real (joint_distribution Y Z {(y, z)})"
hoelzl@40859
   534
  let ?X = "\<lambda>x. real (distribution X {x})"
hoelzl@40859
   535
  let ?Z = "\<lambda>z. real (distribution Z {z})"
hoelzl@40859
   536
hoelzl@40859
   537
  txt {* This proof is actually quiet easy, however we need to show that the
hoelzl@40859
   538
    distributions are finite and the joint distributions are zero when one of
hoelzl@40859
   539
    the variables distribution is also zero. *}
hoelzl@40859
   540
hoelzl@40859
   541
  note finite_var = MX MY MZ
hoelzl@40859
   542
  note random_var = finite_var[THEN finite_random_variableD]
hoelzl@40859
   543
hoelzl@41689
   544
  note space_simps = space_pair_measure space_sigma algebra.simps
hoelzl@40859
   545
hoelzl@40859
   546
  note YZ = finite_random_variable_pairI[OF finite_var(2,3)]
hoelzl@40859
   547
  note XZ = finite_random_variable_pairI[OF finite_var(1,3)]
hoelzl@40859
   548
  note ZX = finite_random_variable_pairI[OF finite_var(3,1)]
hoelzl@40859
   549
  note YZX = finite_random_variable_pairI[OF finite_var(2) ZX]
hoelzl@40859
   550
  note order1 =
hoelzl@40859
   551
    finite_distribution_order(5,6)[OF finite_var(1) YZ, simplified space_simps]
hoelzl@40859
   552
    finite_distribution_order(5,6)[OF finite_var(1,3), simplified space_simps]
hoelzl@40859
   553
hoelzl@40859
   554
  note finite = finite_var(1) YZ finite_var(3) XZ YZX
hoelzl@40859
   555
  note finite[THEN finite_distribution_finite, simplified space_simps, simp]
hoelzl@40859
   556
hoelzl@40859
   557
  have order2: "\<And>x y z. \<lbrakk>x \<in> space MX; y \<in> space MY; z \<in> space MZ; joint_distribution X Z {(x, z)} = 0\<rbrakk>
hoelzl@40859
   558
          \<Longrightarrow> joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)} = 0"
hoelzl@40859
   559
    unfolding joint_distribution_commute_singleton[of X]
hoelzl@40859
   560
    unfolding joint_distribution_assoc_singleton[symmetric]
hoelzl@40859
   561
    using finite_distribution_order(6)[OF finite_var(2) ZX]
hoelzl@40859
   562
    by (auto simp: space_simps)
hoelzl@36624
   563
hoelzl@40859
   564
  have "(\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * log b (?XYZ x y z / (?XZ x z * ?YZdZ y z))) =
hoelzl@40859
   565
    (\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * (log b (?XYZ x y z / (?X x * ?YZ y z)) - log b (?XZ x z / (?X x * ?Z z))))"
hoelzl@40859
   566
    (is "(\<Sum>(x, y, z)\<in>?S. ?L x y z) = (\<Sum>(x, y, z)\<in>?S. ?R x y z)")
hoelzl@40859
   567
  proof (safe intro!: setsum_cong)
hoelzl@40859
   568
    fix x y z assume space: "x \<in> space MX" "y \<in> space MY" "z \<in> space MZ"
hoelzl@40859
   569
    then have *: "?XYZ x y z / (?XZ x z * ?YZdZ y z) =
hoelzl@40859
   570
      (?XYZ x y z / (?X x * ?YZ y z)) / (?XZ x z / (?X x * ?Z z))"
hoelzl@40859
   571
      using order1(3)
hoelzl@41023
   572
      by (auto simp: real_of_pextreal_mult[symmetric] real_of_pextreal_eq_0)
hoelzl@40859
   573
    show "?L x y z = ?R x y z"
hoelzl@40859
   574
    proof cases
hoelzl@40859
   575
      assume "?XYZ x y z \<noteq> 0"
hoelzl@40859
   576
      with space b_gt_1 order1 order2 show ?thesis unfolding *
hoelzl@40859
   577
        by (subst log_divide)
hoelzl@41023
   578
           (auto simp: zero_less_divide_iff zero_less_real_of_pextreal
hoelzl@41023
   579
                       real_of_pextreal_eq_0 zero_less_mult_iff)
hoelzl@40859
   580
    qed simp
hoelzl@40859
   581
  qed
hoelzl@40859
   582
  also have "\<dots> = (\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * log b (?XYZ x y z / (?X x * ?YZ y z))) -
hoelzl@40859
   583
                  (\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * log b (?XZ x z / (?X x * ?Z z)))"
hoelzl@40859
   584
    by (auto simp add: setsum_subtractf[symmetric] field_simps intro!: setsum_cong)
hoelzl@40859
   585
  also have "(\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * log b (?XZ x z / (?X x * ?Z z))) =
hoelzl@40859
   586
             (\<Sum>(x, z)\<in>space MX \<times> space MZ. ?XZ x z * log b (?XZ x z / (?X x * ?Z z)))"
hoelzl@40859
   587
    unfolding setsum_cartesian_product[symmetric] setsum_commute[of _ _ "space MY"]
hoelzl@40859
   588
              setsum_left_distrib[symmetric]
hoelzl@40859
   589
    unfolding joint_distribution_commute_singleton[of X]
hoelzl@40859
   590
    unfolding joint_distribution_assoc_singleton[symmetric]
hoelzl@40859
   591
    using setsum_real_joint_distribution_singleton[OF finite_var(2) ZX, unfolded space_simps]
hoelzl@40859
   592
    by (intro setsum_cong refl) simp
hoelzl@40859
   593
  also have "(\<Sum>(x, y, z)\<in>?S. ?XYZ x y z * log b (?XYZ x y z / (?X x * ?YZ y z))) -
hoelzl@40859
   594
             (\<Sum>(x, z)\<in>space MX \<times> space MZ. ?XZ x z * log b (?XZ x z / (?X x * ?Z z))) =
hoelzl@40859
   595
             conditional_mutual_information b MX MY MZ X Y Z"
hoelzl@40859
   596
    unfolding conditional_mutual_information_def
hoelzl@40859
   597
    unfolding mutual_information_generic_eq[OF finite_var(1,3)]
hoelzl@40859
   598
    unfolding mutual_information_generic_eq[OF finite_var(1) YZ]
hoelzl@41689
   599
    by (simp add: space_sigma space_pair_measure setsum_cartesian_product')
hoelzl@40859
   600
  finally show ?thesis by simp
hoelzl@40859
   601
qed
hoelzl@40859
   602
hoelzl@40859
   603
lemma (in information_space) conditional_mutual_information_eq:
hoelzl@41689
   604
  assumes "simple_function M X" "simple_function M Y" "simple_function M Z"
hoelzl@40859
   605
  shows "\<I>(X;Y|Z) = (\<Sum>(x, y, z) \<in> X`space M \<times> Y`space M \<times> Z`space M.
hoelzl@40859
   606
             real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) *
hoelzl@40859
   607
             log b (real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) /
hoelzl@40859
   608
    (real (joint_distribution X Z {(x, z)}) * real (joint_distribution Y Z {(y,z)} / distribution Z {z}))))"
hoelzl@40859
   609
  using conditional_mutual_information_generic_eq[OF assms[THEN simple_function_imp_finite_random_variable]]
hoelzl@40859
   610
  by simp
hoelzl@40859
   611
hoelzl@40859
   612
lemma (in information_space) conditional_mutual_information_eq_mutual_information:
hoelzl@41689
   613
  assumes X: "simple_function M X" and Y: "simple_function M Y"
hoelzl@40859
   614
  shows "\<I>(X ; Y) = \<I>(X ; Y | (\<lambda>x. ()))"
hoelzl@36624
   615
proof -
hoelzl@36624
   616
  have [simp]: "(\<lambda>x. ()) ` space M = {()}" using not_empty by auto
hoelzl@41689
   617
  have C: "simple_function M (\<lambda>x. ())" by auto
hoelzl@36624
   618
  show ?thesis
hoelzl@40859
   619
    unfolding conditional_mutual_information_eq[OF X Y C]
hoelzl@40859
   620
    unfolding mutual_information_eq[OF X Y]
hoelzl@36624
   621
    by (simp add: setsum_cartesian_product' distribution_remove_const)
hoelzl@36624
   622
qed
hoelzl@36624
   623
hoelzl@40859
   624
lemma (in prob_space) distribution_unit[simp]: "distribution (\<lambda>x. ()) {()} = 1"
hoelzl@40859
   625
  unfolding distribution_def using measure_space_1 by auto
hoelzl@40859
   626
hoelzl@40859
   627
lemma (in prob_space) joint_distribution_unit[simp]: "distribution (\<lambda>x. (X x, ())) {(a, ())} = distribution X {a}"
hoelzl@40859
   628
  unfolding distribution_def by (auto intro!: arg_cong[where f=\<mu>])
hoelzl@40859
   629
hoelzl@40859
   630
lemma (in prob_space) setsum_distribution:
hoelzl@40859
   631
  assumes X: "finite_random_variable MX X" shows "(\<Sum>a\<in>space MX. distribution X {a}) = 1"
hoelzl@40859
   632
  using setsum_joint_distribution[OF assms, of "\<lparr> space = UNIV, sets = Pow UNIV \<rparr>" "\<lambda>x. ()" "{()}"]
hoelzl@41689
   633
  using sigma_algebra_Pow[of "UNIV::unit set" "()"] by simp
hoelzl@40859
   634
hoelzl@40859
   635
lemma (in prob_space) setsum_real_distribution:
hoelzl@41689
   636
  fixes MX :: "('c, 'd) measure_space_scheme"
hoelzl@40859
   637
  assumes X: "finite_random_variable MX X" shows "(\<Sum>a\<in>space MX. real (distribution X {a})) = 1"
hoelzl@41689
   638
  using setsum_real_joint_distribution[OF assms, of "\<lparr> space = UNIV, sets = Pow UNIV, measure = undefined \<rparr>" "\<lambda>x. ()" "{()}"]
hoelzl@41689
   639
  using sigma_algebra_Pow[of "UNIV::unit set" "\<lparr> measure = undefined \<rparr>"] by simp
hoelzl@40859
   640
hoelzl@40859
   641
lemma (in information_space) conditional_mutual_information_generic_positive:
hoelzl@40859
   642
  assumes "finite_random_variable MX X" and "finite_random_variable MY Y" and "finite_random_variable MZ Z"
hoelzl@40859
   643
  shows "0 \<le> conditional_mutual_information b MX MY MZ X Y Z"
hoelzl@40859
   644
proof (cases "space MX \<times> space MY \<times> space MZ = {}")
hoelzl@40859
   645
  case True show ?thesis
hoelzl@40859
   646
    unfolding conditional_mutual_information_generic_eq[OF assms] True
hoelzl@40859
   647
    by simp
hoelzl@40859
   648
next
hoelzl@40859
   649
  case False
hoelzl@38656
   650
  let "?dXYZ A" = "real (distribution (\<lambda>x. (X x, Y x, Z x)) A)"
hoelzl@38656
   651
  let "?dXZ A" = "real (joint_distribution X Z A)"
hoelzl@38656
   652
  let "?dYZ A" = "real (joint_distribution Y Z A)"
hoelzl@38656
   653
  let "?dX A" = "real (distribution X A)"
hoelzl@38656
   654
  let "?dZ A" = "real (distribution Z A)"
hoelzl@40859
   655
  let ?M = "space MX \<times> space MY \<times> space MZ"
hoelzl@36624
   656
nipkow@39302
   657
  have split_beta: "\<And>f. split f = (\<lambda>x. f (fst x) (snd x))" by (simp add: fun_eq_iff)
hoelzl@36080
   658
hoelzl@41689
   659
  note space_simps = space_pair_measure space_sigma algebra.simps
hoelzl@40859
   660
hoelzl@40859
   661
  note finite_var = assms
hoelzl@40859
   662
  note YZ = finite_random_variable_pairI[OF finite_var(2,3)]
hoelzl@40859
   663
  note XZ = finite_random_variable_pairI[OF finite_var(1,3)]
hoelzl@40859
   664
  note ZX = finite_random_variable_pairI[OF finite_var(3,1)]
hoelzl@40859
   665
  note YZ = finite_random_variable_pairI[OF finite_var(2,3)]
hoelzl@40859
   666
  note XYZ = finite_random_variable_pairI[OF finite_var(1) YZ]
hoelzl@40859
   667
  note finite = finite_var(3) YZ XZ XYZ
hoelzl@40859
   668
  note finite = finite[THEN finite_distribution_finite, simplified space_simps]
hoelzl@40859
   669
hoelzl@40859
   670
  have order: "\<And>x y z. \<lbrakk>x \<in> space MX; y \<in> space MY; z \<in> space MZ; joint_distribution X Z {(x, z)} = 0\<rbrakk>
hoelzl@40859
   671
          \<Longrightarrow> joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)} = 0"
hoelzl@40859
   672
    unfolding joint_distribution_commute_singleton[of X]
hoelzl@40859
   673
    unfolding joint_distribution_assoc_singleton[symmetric]
hoelzl@40859
   674
    using finite_distribution_order(6)[OF finite_var(2) ZX]
hoelzl@40859
   675
    by (auto simp: space_simps)
hoelzl@40859
   676
hoelzl@40859
   677
  note order = order
hoelzl@40859
   678
    finite_distribution_order(5,6)[OF finite_var(1) YZ, simplified space_simps]
hoelzl@40859
   679
    finite_distribution_order(5,6)[OF finite_var(2,3), simplified space_simps]
hoelzl@40859
   680
hoelzl@40859
   681
  have "- conditional_mutual_information b MX MY MZ X Y Z = - (\<Sum>(x, y, z) \<in> ?M. ?dXYZ {(x, y, z)} *
hoelzl@40859
   682
    log b (?dXYZ {(x, y, z)} / (?dXZ {(x, z)} * ?dYZ {(y,z)} / ?dZ {z})))"
hoelzl@40859
   683
    unfolding conditional_mutual_information_generic_eq[OF assms] neg_equal_iff_equal
hoelzl@41023
   684
    by (intro setsum_cong) (auto intro!: arg_cong[where f="log b"] simp: real_of_pextreal_mult[symmetric])
hoelzl@40859
   685
  also have "\<dots> \<le> log b (\<Sum>(x, y, z) \<in> ?M. ?dXZ {(x, z)} * ?dYZ {(y,z)} / ?dZ {z})"
hoelzl@36624
   686
    unfolding split_beta
hoelzl@36624
   687
  proof (rule log_setsum_divide)
hoelzl@40859
   688
    show "?M \<noteq> {}" using False by simp
hoelzl@36624
   689
    show "1 < b" using b_gt_1 .
hoelzl@36080
   690
hoelzl@40859
   691
    show "finite ?M" using assms
hoelzl@40859
   692
      unfolding finite_sigma_algebra_def finite_sigma_algebra_axioms_def by auto
hoelzl@40859
   693
hoelzl@40859
   694
    show "(\<Sum>x\<in>?M. ?dXYZ {(fst x, fst (snd x), snd (snd x))}) = 1"
hoelzl@40859
   695
      unfolding setsum_cartesian_product'
hoelzl@40859
   696
      unfolding setsum_commute[of _ "space MY"]
hoelzl@40859
   697
      unfolding setsum_commute[of _ "space MZ"]
hoelzl@41689
   698
      by (simp_all add: space_pair_measure
hoelzl@40859
   699
        setsum_real_joint_distribution_singleton[OF `finite_random_variable MX X` YZ]
hoelzl@40859
   700
        setsum_real_joint_distribution_singleton[OF `finite_random_variable MY Y` finite_var(3)]
hoelzl@40859
   701
        setsum_real_distribution[OF `finite_random_variable MZ Z`])
hoelzl@40859
   702
hoelzl@36624
   703
    fix x assume "x \<in> ?M"
hoelzl@38656
   704
    let ?x = "(fst x, fst (snd x), snd (snd x))"
hoelzl@38656
   705
hoelzl@41023
   706
    show "0 \<le> ?dXYZ {?x}" using real_pextreal_nonneg .
hoelzl@36624
   707
    show "0 \<le> ?dXZ {(fst x, snd (snd x))} * ?dYZ {(fst (snd x), snd (snd x))} / ?dZ {snd (snd x)}"
hoelzl@41023
   708
     by (simp add: real_pextreal_nonneg mult_nonneg_nonneg divide_nonneg_nonneg)
hoelzl@36080
   709
hoelzl@38656
   710
    assume *: "0 < ?dXYZ {?x}"
hoelzl@40859
   711
    with `x \<in> ?M` show "0 < ?dXZ {(fst x, snd (snd x))} * ?dYZ {(fst (snd x), snd (snd x))} / ?dZ {snd (snd x)}"
hoelzl@40859
   712
      using finite order
hoelzl@40859
   713
      by (cases x)
hoelzl@41023
   714
         (auto simp add: zero_less_real_of_pextreal zero_less_mult_iff zero_less_divide_iff)
hoelzl@40859
   715
  qed
hoelzl@40859
   716
  also have "(\<Sum>(x, y, z) \<in> ?M. ?dXZ {(x, z)} * ?dYZ {(y,z)} / ?dZ {z}) = (\<Sum>z\<in>space MZ. ?dZ {z})"
hoelzl@36624
   717
    apply (simp add: setsum_cartesian_product')
hoelzl@36624
   718
    apply (subst setsum_commute)
hoelzl@36624
   719
    apply (subst (2) setsum_commute)
hoelzl@40859
   720
    by (auto simp: setsum_divide_distrib[symmetric] setsum_product[symmetric]
hoelzl@40859
   721
                   setsum_real_joint_distribution_singleton[OF finite_var(1,3)]
hoelzl@40859
   722
                   setsum_real_joint_distribution_singleton[OF finite_var(2,3)]
hoelzl@36624
   723
          intro!: setsum_cong)
hoelzl@40859
   724
  also have "log b (\<Sum>z\<in>space MZ. ?dZ {z}) = 0"
hoelzl@40859
   725
    unfolding setsum_real_distribution[OF finite_var(3)] by simp
hoelzl@40859
   726
  finally show ?thesis by simp
hoelzl@36080
   727
qed
hoelzl@36080
   728
hoelzl@40859
   729
lemma (in information_space) conditional_mutual_information_positive:
hoelzl@41689
   730
  assumes "simple_function M X" and "simple_function M Y" and "simple_function M Z"
hoelzl@40859
   731
  shows "0 \<le> \<I>(X;Y|Z)"
hoelzl@41689
   732
  by (rule conditional_mutual_information_generic_positive[OF assms[THEN simple_function_imp_finite_random_variable]])
hoelzl@40859
   733
hoelzl@39097
   734
subsection {* Conditional Entropy *}
hoelzl@39097
   735
hoelzl@36080
   736
definition (in prob_space)
hoelzl@36080
   737
  "conditional_entropy b S T X Y = conditional_mutual_information b S S T X X Y"
hoelzl@36080
   738
hoelzl@40859
   739
abbreviation (in information_space)
hoelzl@40859
   740
  conditional_entropy_Pow ("\<H>'(_ | _')") where
hoelzl@36624
   741
  "\<H>(X | Y) \<equiv> conditional_entropy b
hoelzl@41689
   742
    \<lparr> space = X`space M, sets = Pow (X`space M), measure = distribution X \<rparr>
hoelzl@41689
   743
    \<lparr> space = Y`space M, sets = Pow (Y`space M), measure = distribution Y \<rparr> X Y"
hoelzl@36080
   744
hoelzl@40859
   745
lemma (in information_space) conditional_entropy_positive:
hoelzl@41689
   746
  "simple_function M X \<Longrightarrow> simple_function M Y \<Longrightarrow> 0 \<le> \<H>(X | Y)"
hoelzl@40859
   747
  unfolding conditional_entropy_def by (auto intro!: conditional_mutual_information_positive)
hoelzl@36080
   748
hoelzl@40859
   749
lemma (in measure_space) empty_measureI: "A = {} \<Longrightarrow> \<mu> A = 0" by simp
hoelzl@40859
   750
hoelzl@40859
   751
lemma (in information_space) conditional_entropy_generic_eq:
hoelzl@41689
   752
  fixes MX :: "('c, 'd) measure_space_scheme" and MY :: "('e, 'f) measure_space_scheme"
hoelzl@40859
   753
  assumes MX: "finite_random_variable MX X"
hoelzl@40859
   754
  assumes MZ: "finite_random_variable MZ Z"
hoelzl@39097
   755
  shows "conditional_entropy b MX MZ X Z =
hoelzl@39097
   756
     - (\<Sum>(x, z)\<in>space MX \<times> space MZ.
hoelzl@39097
   757
         real (joint_distribution X Z {(x, z)}) *
hoelzl@39097
   758
         log b (real (joint_distribution X Z {(x, z)}) / real (distribution Z {z})))"
hoelzl@40859
   759
proof -
hoelzl@40859
   760
  interpret MX: finite_sigma_algebra MX using MX by simp
hoelzl@40859
   761
  interpret MZ: finite_sigma_algebra MZ using MZ by simp
hoelzl@40859
   762
  let "?XXZ x y z" = "joint_distribution X (\<lambda>x. (X x, Z x)) {(x, y, z)}"
hoelzl@40859
   763
  let "?XZ x z" = "joint_distribution X Z {(x, z)}"
hoelzl@40859
   764
  let "?Z z" = "distribution Z {z}"
hoelzl@40859
   765
  let "?f x y z" = "log b (real (?XXZ x y z) / (real (?XZ x z) * real (?XZ y z / ?Z z)))"
hoelzl@40859
   766
  { fix x z have "?XXZ x x z = ?XZ x z"
hoelzl@40859
   767
      unfolding distribution_def by (auto intro!: arg_cong[where f=\<mu>]) }
hoelzl@40859
   768
  note this[simp]
hoelzl@41689
   769
  { fix x x' :: 'c and z assume "x' \<noteq> x"
hoelzl@40859
   770
    then have "?XXZ x x' z = 0"
hoelzl@40859
   771
      by (auto simp: distribution_def intro!: arg_cong[where f=\<mu>] empty_measureI) }
hoelzl@40859
   772
  note this[simp]
hoelzl@40859
   773
  { fix x x' z assume *: "x \<in> space MX" "z \<in> space MZ"
hoelzl@40859
   774
    then have "(\<Sum>x'\<in>space MX. real (?XXZ x x' z) * ?f x x' z)
hoelzl@40859
   775
      = (\<Sum>x'\<in>space MX. if x = x' then real (?XZ x z) * ?f x x z else 0)"
hoelzl@40859
   776
      by (auto intro!: setsum_cong)
hoelzl@40859
   777
    also have "\<dots> = real (?XZ x z) * ?f x x z"
hoelzl@40859
   778
      using `x \<in> space MX` by (simp add: setsum_cases[OF MX.finite_space])
hoelzl@40859
   779
    also have "\<dots> = real (?XZ x z) * log b (real (?Z z) / real (?XZ x z))"
hoelzl@41023
   780
      by (auto simp: real_of_pextreal_mult[symmetric])
hoelzl@40859
   781
    also have "\<dots> = - real (?XZ x z) * log b (real (?XZ x z) / real (?Z z))"
hoelzl@40859
   782
      using assms[THEN finite_distribution_finite]
hoelzl@40859
   783
      using finite_distribution_order(6)[OF MX MZ]
hoelzl@41023
   784
      by (auto simp: log_simps field_simps zero_less_mult_iff zero_less_real_of_pextreal real_of_pextreal_eq_0)
hoelzl@40859
   785
    finally have "(\<Sum>x'\<in>space MX. real (?XXZ x x' z) * ?f x x' z) =
hoelzl@40859
   786
      - real (?XZ x z) * log b (real (?XZ x z) / real (?Z z))" . }
hoelzl@40859
   787
  note * = this
hoelzl@40859
   788
  show ?thesis
hoelzl@40859
   789
    unfolding conditional_entropy_def
hoelzl@40859
   790
    unfolding conditional_mutual_information_generic_eq[OF MX MX MZ]
hoelzl@40859
   791
    by (auto simp: setsum_cartesian_product' setsum_negf[symmetric]
hoelzl@41023
   792
                   setsum_commute[of _ "space MZ"] *   simp del: divide_pextreal_def
hoelzl@40859
   793
             intro!: setsum_cong)
hoelzl@39097
   794
qed
hoelzl@39097
   795
hoelzl@40859
   796
lemma (in information_space) conditional_entropy_eq:
hoelzl@41689
   797
  assumes "simple_function M X" "simple_function M Z"
hoelzl@40859
   798
  shows "\<H>(X | Z) =
hoelzl@36080
   799
     - (\<Sum>(x, z)\<in>X ` space M \<times> Z ` space M.
hoelzl@38656
   800
         real (joint_distribution X Z {(x, z)}) *
hoelzl@38656
   801
         log b (real (joint_distribution X Z {(x, z)}) / real (distribution Z {z})))"
hoelzl@40859
   802
  using conditional_entropy_generic_eq[OF assms[THEN simple_function_imp_finite_random_variable]]
hoelzl@40859
   803
  by simp
hoelzl@39097
   804
hoelzl@40859
   805
lemma (in information_space) conditional_entropy_eq_ce_with_hypothesis:
hoelzl@41689
   806
  assumes X: "simple_function M X" and Y: "simple_function M Y"
hoelzl@40859
   807
  shows "\<H>(X | Y) =
hoelzl@39097
   808
    -(\<Sum>y\<in>Y`space M. real (distribution Y {y}) *
hoelzl@39097
   809
      (\<Sum>x\<in>X`space M. real (joint_distribution X Y {(x,y)}) / real (distribution Y {(y)}) *
hoelzl@39097
   810
              log b (real (joint_distribution X Y {(x,y)}) / real (distribution Y {(y)}))))"
hoelzl@40859
   811
  unfolding conditional_entropy_eq[OF assms]
hoelzl@40859
   812
  using finite_distribution_finite[OF finite_random_variable_pairI[OF assms[THEN simple_function_imp_finite_random_variable]]]
hoelzl@40859
   813
  using finite_distribution_order(5,6)[OF assms[THEN simple_function_imp_finite_random_variable]]
hoelzl@40859
   814
  using finite_distribution_finite[OF Y[THEN simple_function_imp_finite_random_variable]]
hoelzl@41023
   815
  by (auto simp: setsum_cartesian_product'  setsum_commute[of _ "Y`space M"] setsum_right_distrib real_of_pextreal_eq_0
hoelzl@40859
   816
           intro!: setsum_cong)
hoelzl@39097
   817
hoelzl@40859
   818
lemma (in information_space) conditional_entropy_eq_cartesian_product:
hoelzl@41689
   819
  assumes "simple_function M X" "simple_function M Y"
hoelzl@40859
   820
  shows "\<H>(X | Y) = -(\<Sum>x\<in>X`space M. \<Sum>y\<in>Y`space M.
hoelzl@39097
   821
    real (joint_distribution X Y {(x,y)}) *
hoelzl@39097
   822
    log b (real (joint_distribution X Y {(x,y)}) / real (distribution Y {y})))"
hoelzl@40859
   823
  unfolding conditional_entropy_eq[OF assms]
hoelzl@40859
   824
  by (auto intro!: setsum_cong simp: setsum_cartesian_product')
hoelzl@36080
   825
hoelzl@39097
   826
subsection {* Equalities *}
hoelzl@39097
   827
hoelzl@40859
   828
lemma (in information_space) mutual_information_eq_entropy_conditional_entropy:
hoelzl@41689
   829
  assumes X: "simple_function M X" and Z: "simple_function M Z"
hoelzl@40859
   830
  shows  "\<I>(X ; Z) = \<H>(X) - \<H>(X | Z)"
hoelzl@40859
   831
proof -
hoelzl@40859
   832
  let "?XZ x z" = "real (joint_distribution X Z {(x, z)})"
hoelzl@40859
   833
  let "?Z z" = "real (distribution Z {z})"
hoelzl@40859
   834
  let "?X x" = "real (distribution X {x})"
hoelzl@40859
   835
  note fX = X[THEN simple_function_imp_finite_random_variable]
hoelzl@40859
   836
  note fZ = Z[THEN simple_function_imp_finite_random_variable]
hoelzl@40859
   837
  note fX[THEN finite_distribution_finite, simp] and fZ[THEN finite_distribution_finite, simp]
hoelzl@40859
   838
  note finite_distribution_order[OF fX fZ, simp]
hoelzl@40859
   839
  { fix x z assume "x \<in> X`space M" "z \<in> Z`space M"
hoelzl@40859
   840
    have "?XZ x z * log b (?XZ x z / (?X x * ?Z z)) =
hoelzl@40859
   841
          ?XZ x z * log b (?XZ x z / ?Z z) - ?XZ x z * log b (?X x)"
hoelzl@41023
   842
      by (auto simp: log_simps real_of_pextreal_mult[symmetric] zero_less_mult_iff
hoelzl@41023
   843
                     zero_less_real_of_pextreal field_simps real_of_pextreal_eq_0 abs_mult) }
hoelzl@40859
   844
  note * = this
hoelzl@40859
   845
  show ?thesis
hoelzl@40859
   846
    unfolding entropy_eq[OF X] conditional_entropy_eq[OF X Z] mutual_information_eq[OF X Z]
hoelzl@40859
   847
    using setsum_real_joint_distribution_singleton[OF fZ fX, unfolded joint_distribution_commute_singleton[of Z X]]
hoelzl@40859
   848
    by (simp add: * setsum_cartesian_product' setsum_subtractf setsum_left_distrib[symmetric]
hoelzl@40859
   849
                     setsum_real_distribution)
hoelzl@40859
   850
qed
hoelzl@36080
   851
hoelzl@40859
   852
lemma (in information_space) conditional_entropy_less_eq_entropy:
hoelzl@41689
   853
  assumes X: "simple_function M X" and Z: "simple_function M Z"
hoelzl@40859
   854
  shows "\<H>(X | Z) \<le> \<H>(X)"
hoelzl@36624
   855
proof -
hoelzl@40859
   856
  have "\<I>(X ; Z) = \<H>(X) - \<H>(X | Z)" using mutual_information_eq_entropy_conditional_entropy[OF assms] .
hoelzl@40859
   857
  with mutual_information_positive[OF X Z] entropy_positive[OF X]
hoelzl@36624
   858
  show ?thesis by auto
hoelzl@36080
   859
qed
hoelzl@36080
   860
hoelzl@40859
   861
lemma (in information_space) entropy_chain_rule:
hoelzl@41689
   862
  assumes X: "simple_function M X" and Y: "simple_function M Y"
hoelzl@40859
   863
  shows  "\<H>(\<lambda>x. (X x, Y x)) = \<H>(X) + \<H>(Y|X)"
hoelzl@40859
   864
proof -
hoelzl@40859
   865
  let "?XY x y" = "real (joint_distribution X Y {(x, y)})"
hoelzl@40859
   866
  let "?Y y" = "real (distribution Y {y})"
hoelzl@40859
   867
  let "?X x" = "real (distribution X {x})"
hoelzl@40859
   868
  note fX = X[THEN simple_function_imp_finite_random_variable]
hoelzl@40859
   869
  note fY = Y[THEN simple_function_imp_finite_random_variable]
hoelzl@40859
   870
  note fX[THEN finite_distribution_finite, simp] and fY[THEN finite_distribution_finite, simp]
hoelzl@40859
   871
  note finite_distribution_order[OF fX fY, simp]
hoelzl@40859
   872
  { fix x y assume "x \<in> X`space M" "y \<in> Y`space M"
hoelzl@40859
   873
    have "?XY x y * log b (?XY x y / ?X x) =
hoelzl@40859
   874
          ?XY x y * log b (?XY x y) - ?XY x y * log b (?X x)"
hoelzl@41023
   875
      by (auto simp: log_simps real_of_pextreal_mult[symmetric] zero_less_mult_iff
hoelzl@41023
   876
                     zero_less_real_of_pextreal field_simps real_of_pextreal_eq_0 abs_mult) }
hoelzl@40859
   877
  note * = this
hoelzl@40859
   878
  show ?thesis
hoelzl@40859
   879
    using setsum_real_joint_distribution_singleton[OF fY fX]
hoelzl@40859
   880
    unfolding entropy_eq[OF X] conditional_entropy_eq_cartesian_product[OF Y X] entropy_eq_cartesian_product[OF X Y]
hoelzl@40859
   881
    unfolding joint_distribution_commute_singleton[of Y X] setsum_commute[of _ "X`space M"]
hoelzl@40859
   882
    by (simp add: * setsum_subtractf setsum_left_distrib[symmetric])
hoelzl@40859
   883
qed
hoelzl@38656
   884
hoelzl@39097
   885
section {* Partitioning *}
hoelzl@36080
   886
hoelzl@36624
   887
definition "subvimage A f g \<longleftrightarrow> (\<forall>x \<in> A. f -` {f x} \<inter> A \<subseteq> g -` {g x} \<inter> A)"
hoelzl@36624
   888
hoelzl@36624
   889
lemma subvimageI:
hoelzl@36624
   890
  assumes "\<And>x y. \<lbrakk> x \<in> A ; y \<in> A ; f x = f y \<rbrakk> \<Longrightarrow> g x = g y"
hoelzl@36624
   891
  shows "subvimage A f g"
hoelzl@36624
   892
  using assms unfolding subvimage_def by blast
hoelzl@36624
   893
hoelzl@36624
   894
lemma subvimageE[consumes 1]:
hoelzl@36624
   895
  assumes "subvimage A f g"
hoelzl@36624
   896
  obtains "\<And>x y. \<lbrakk> x \<in> A ; y \<in> A ; f x = f y \<rbrakk> \<Longrightarrow> g x = g y"
hoelzl@36624
   897
  using assms unfolding subvimage_def by blast
hoelzl@36624
   898
hoelzl@36624
   899
lemma subvimageD:
hoelzl@36624
   900
  "\<lbrakk> subvimage A f g ; x \<in> A ; y \<in> A ; f x = f y \<rbrakk> \<Longrightarrow> g x = g y"
hoelzl@36624
   901
  using assms unfolding subvimage_def by blast
hoelzl@36624
   902
hoelzl@36624
   903
lemma subvimage_subset:
hoelzl@36624
   904
  "\<lbrakk> subvimage B f g ; A \<subseteq> B \<rbrakk> \<Longrightarrow> subvimage A f g"
hoelzl@36624
   905
  unfolding subvimage_def by auto
hoelzl@36624
   906
hoelzl@36624
   907
lemma subvimage_idem[intro]: "subvimage A g g"
hoelzl@36624
   908
  by (safe intro!: subvimageI)
hoelzl@36624
   909
hoelzl@36624
   910
lemma subvimage_comp_finer[intro]:
hoelzl@36624
   911
  assumes svi: "subvimage A g h"
hoelzl@36624
   912
  shows "subvimage A g (f \<circ> h)"
hoelzl@36624
   913
proof (rule subvimageI, simp)
hoelzl@36624
   914
  fix x y assume "x \<in> A" "y \<in> A" "g x = g y"
hoelzl@36624
   915
  from svi[THEN subvimageD, OF this]
hoelzl@36624
   916
  show "f (h x) = f (h y)" by simp
hoelzl@36624
   917
qed
hoelzl@36624
   918
hoelzl@36624
   919
lemma subvimage_comp_gran:
hoelzl@36624
   920
  assumes svi: "subvimage A g h"
hoelzl@36624
   921
  assumes inj: "inj_on f (g ` A)"
hoelzl@36624
   922
  shows "subvimage A (f \<circ> g) h"
hoelzl@36624
   923
  by (rule subvimageI) (auto intro!: subvimageD[OF svi] simp: inj_on_iff[OF inj])
hoelzl@36624
   924
hoelzl@36624
   925
lemma subvimage_comp:
hoelzl@36624
   926
  assumes svi: "subvimage (f ` A) g h"
hoelzl@36624
   927
  shows "subvimage A (g \<circ> f) (h \<circ> f)"
hoelzl@36624
   928
  by (rule subvimageI) (auto intro!: svi[THEN subvimageD])
hoelzl@36624
   929
hoelzl@36624
   930
lemma subvimage_trans:
hoelzl@36624
   931
  assumes fg: "subvimage A f g"
hoelzl@36624
   932
  assumes gh: "subvimage A g h"
hoelzl@36624
   933
  shows "subvimage A f h"
hoelzl@36624
   934
  by (rule subvimageI) (auto intro!: fg[THEN subvimageD] gh[THEN subvimageD])
hoelzl@36624
   935
hoelzl@36624
   936
lemma subvimage_translator:
hoelzl@36624
   937
  assumes svi: "subvimage A f g"
hoelzl@36624
   938
  shows "\<exists>h. \<forall>x \<in> A. h (f x)  = g x"
hoelzl@36624
   939
proof (safe intro!: exI[of _ "\<lambda>x. (THE z. z \<in> (g ` (f -` {x} \<inter> A)))"])
hoelzl@36624
   940
  fix x assume "x \<in> A"
hoelzl@36624
   941
  show "(THE x'. x' \<in> (g ` (f -` {f x} \<inter> A))) = g x"
hoelzl@36624
   942
    by (rule theI2[of _ "g x"])
hoelzl@36624
   943
      (insert `x \<in> A`, auto intro!: svi[THEN subvimageD])
hoelzl@36624
   944
qed
hoelzl@36624
   945
hoelzl@36624
   946
lemma subvimage_translator_image:
hoelzl@36624
   947
  assumes svi: "subvimage A f g"
hoelzl@36624
   948
  shows "\<exists>h. h ` f ` A = g ` A"
hoelzl@36624
   949
proof -
hoelzl@36624
   950
  from subvimage_translator[OF svi]
hoelzl@36624
   951
  obtain h where "\<And>x. x \<in> A \<Longrightarrow> h (f x) = g x" by auto
hoelzl@36624
   952
  thus ?thesis
hoelzl@36624
   953
    by (auto intro!: exI[of _ h]
hoelzl@36624
   954
      simp: image_compose[symmetric] comp_def cong: image_cong)
hoelzl@36624
   955
qed
hoelzl@36624
   956
hoelzl@36624
   957
lemma subvimage_finite:
hoelzl@36624
   958
  assumes svi: "subvimage A f g" and fin: "finite (f`A)"
hoelzl@36624
   959
  shows "finite (g`A)"
hoelzl@36624
   960
proof -
hoelzl@36624
   961
  from subvimage_translator_image[OF svi]
hoelzl@36624
   962
  obtain h where "g`A = h`f`A" by fastsimp
hoelzl@36624
   963
  with fin show "finite (g`A)" by simp
hoelzl@36624
   964
qed
hoelzl@36624
   965
hoelzl@36624
   966
lemma subvimage_disj:
hoelzl@36624
   967
  assumes svi: "subvimage A f g"
hoelzl@36624
   968
  shows "f -` {x} \<inter> A \<subseteq> g -` {y} \<inter> A \<or>
hoelzl@36624
   969
      f -` {x} \<inter> g -` {y} \<inter> A = {}" (is "?sub \<or> ?dist")
hoelzl@36624
   970
proof (rule disjCI)
hoelzl@36624
   971
  assume "\<not> ?dist"
hoelzl@36624
   972
  then obtain z where "z \<in> A" and "x = f z" and "y = g z" by auto
hoelzl@36624
   973
  thus "?sub" using svi unfolding subvimage_def by auto
hoelzl@36624
   974
qed
hoelzl@36624
   975
hoelzl@36624
   976
lemma setsum_image_split:
hoelzl@36624
   977
  assumes svi: "subvimage A f g" and fin: "finite (f ` A)"
hoelzl@36624
   978
  shows "(\<Sum>x\<in>f`A. h x) = (\<Sum>y\<in>g`A. \<Sum>x\<in>f`(g -` {y} \<inter> A). h x)"
hoelzl@36624
   979
    (is "?lhs = ?rhs")
hoelzl@36624
   980
proof -
hoelzl@36624
   981
  have "f ` A =
hoelzl@36624
   982
      snd ` (SIGMA x : g ` A. f ` (g -` {x} \<inter> A))"
hoelzl@36624
   983
      (is "_ = snd ` ?SIGMA")
hoelzl@36624
   984
    unfolding image_split_eq_Sigma[symmetric]
hoelzl@36624
   985
    by (simp add: image_compose[symmetric] comp_def)
hoelzl@36624
   986
  moreover
hoelzl@36624
   987
  have snd_inj: "inj_on snd ?SIGMA"
hoelzl@36624
   988
    unfolding image_split_eq_Sigma[symmetric]
hoelzl@36624
   989
    by (auto intro!: inj_onI subvimageD[OF svi])
hoelzl@36624
   990
  ultimately
hoelzl@36624
   991
  have "(\<Sum>x\<in>f`A. h x) = (\<Sum>(x,y)\<in>?SIGMA. h y)"
hoelzl@36624
   992
    by (auto simp: setsum_reindex intro: setsum_cong)
hoelzl@36624
   993
  also have "... = ?rhs"
hoelzl@36624
   994
    using subvimage_finite[OF svi fin] fin
hoelzl@36624
   995
    apply (subst setsum_Sigma[symmetric])
hoelzl@36624
   996
    by (auto intro!: finite_subset[of _ "f`A"])
hoelzl@36624
   997
  finally show ?thesis .
hoelzl@36624
   998
qed
hoelzl@36624
   999
hoelzl@40859
  1000
lemma (in information_space) entropy_partition:
hoelzl@41689
  1001
  assumes sf: "simple_function M X" "simple_function M P"
hoelzl@36624
  1002
  assumes svi: "subvimage (space M) X P"
hoelzl@36624
  1003
  shows "\<H>(X) = \<H>(P) + \<H>(X|P)"
hoelzl@36624
  1004
proof -
hoelzl@40859
  1005
  let "?XP x p" = "real (joint_distribution X P {(x, p)})"
hoelzl@40859
  1006
  let "?X x" = "real (distribution X {x})"
hoelzl@40859
  1007
  let "?P p" = "real (distribution P {p})"
hoelzl@40859
  1008
  note fX = sf(1)[THEN simple_function_imp_finite_random_variable]
hoelzl@40859
  1009
  note fP = sf(2)[THEN simple_function_imp_finite_random_variable]
hoelzl@40859
  1010
  note fX[THEN finite_distribution_finite, simp] and fP[THEN finite_distribution_finite, simp]
hoelzl@40859
  1011
  note finite_distribution_order[OF fX fP, simp]
hoelzl@38656
  1012
  have "(\<Sum>x\<in>X ` space M. real (distribution X {x}) * log b (real (distribution X {x}))) =
hoelzl@36624
  1013
    (\<Sum>y\<in>P `space M. \<Sum>x\<in>X ` space M.
hoelzl@38656
  1014
    real (joint_distribution X P {(x, y)}) * log b (real (joint_distribution X P {(x, y)})))"
hoelzl@36624
  1015
  proof (subst setsum_image_split[OF svi],
hoelzl@40859
  1016
      safe intro!: setsum_mono_zero_cong_left imageI)
hoelzl@40859
  1017
    show "finite (X ` space M)" "finite (X ` space M)" "finite (P ` space M)"
hoelzl@40859
  1018
      using sf unfolding simple_function_def by auto
hoelzl@40859
  1019
  next
hoelzl@36624
  1020
    fix p x assume in_space: "p \<in> space M" "x \<in> space M"
hoelzl@38656
  1021
    assume "real (joint_distribution X P {(X x, P p)}) * log b (real (joint_distribution X P {(X x, P p)})) \<noteq> 0"
hoelzl@36624
  1022
    hence "(\<lambda>x. (X x, P x)) -` {(X x, P p)} \<inter> space M \<noteq> {}" by (auto simp: distribution_def)
hoelzl@36624
  1023
    with svi[unfolded subvimage_def, rule_format, OF `x \<in> space M`]
hoelzl@36624
  1024
    show "x \<in> P -` {P p}" by auto
hoelzl@36624
  1025
  next
hoelzl@36624
  1026
    fix p x assume in_space: "p \<in> space M" "x \<in> space M"
hoelzl@36624
  1027
    assume "P x = P p"
hoelzl@36624
  1028
    from this[symmetric] svi[unfolded subvimage_def, rule_format, OF `x \<in> space M`]
hoelzl@36624
  1029
    have "X -` {X x} \<inter> space M \<subseteq> P -` {P p} \<inter> space M"
hoelzl@36624
  1030
      by auto
hoelzl@36624
  1031
    hence "(\<lambda>x. (X x, P x)) -` {(X x, P p)} \<inter> space M = X -` {X x} \<inter> space M"
hoelzl@36624
  1032
      by auto
hoelzl@38656
  1033
    thus "real (distribution X {X x}) * log b (real (distribution X {X x})) =
hoelzl@38656
  1034
          real (joint_distribution X P {(X x, P p)}) *
hoelzl@38656
  1035
          log b (real (joint_distribution X P {(X x, P p)}))"
hoelzl@36624
  1036
      by (auto simp: distribution_def)
hoelzl@36624
  1037
  qed
hoelzl@40859
  1038
  moreover have "\<And>x y. real (joint_distribution X P {(x, y)}) *
hoelzl@40859
  1039
      log b (real (joint_distribution X P {(x, y)}) / real (distribution P {y})) =
hoelzl@40859
  1040
      real (joint_distribution X P {(x, y)}) * log b (real (joint_distribution X P {(x, y)})) -
hoelzl@40859
  1041
      real (joint_distribution X P {(x, y)}) * log b (real (distribution P {y}))"
hoelzl@40859
  1042
    by (auto simp add: log_simps zero_less_mult_iff field_simps)
hoelzl@40859
  1043
  ultimately show ?thesis
hoelzl@40859
  1044
    unfolding sf[THEN entropy_eq] conditional_entropy_eq[OF sf]
hoelzl@40859
  1045
    using setsum_real_joint_distribution_singleton[OF fX fP]
hoelzl@38656
  1046
    by (simp add: setsum_cartesian_product' setsum_subtractf setsum_real_distribution
hoelzl@36624
  1047
      setsum_left_distrib[symmetric] setsum_commute[where B="P`space M"])
hoelzl@36624
  1048
qed
hoelzl@36624
  1049
hoelzl@40859
  1050
corollary (in information_space) entropy_data_processing:
hoelzl@41689
  1051
  assumes X: "simple_function M X" shows "\<H>(f \<circ> X) \<le> \<H>(X)"
hoelzl@40859
  1052
proof -
hoelzl@40859
  1053
  note X
hoelzl@41689
  1054
  moreover have fX: "simple_function M (f \<circ> X)" using X by auto
hoelzl@40859
  1055
  moreover have "subvimage (space M) X (f \<circ> X)" by auto
hoelzl@40859
  1056
  ultimately have "\<H>(X) = \<H>(f\<circ>X) + \<H>(X|f\<circ>X)" by (rule entropy_partition)
hoelzl@40859
  1057
  then show "\<H>(f \<circ> X) \<le> \<H>(X)"
hoelzl@40859
  1058
    by (auto intro: conditional_entropy_positive[OF X fX])
hoelzl@40859
  1059
qed
hoelzl@36624
  1060
hoelzl@40859
  1061
corollary (in information_space) entropy_of_inj:
hoelzl@41689
  1062
  assumes X: "simple_function M X" and inj: "inj_on f (X`space M)"
hoelzl@36624
  1063
  shows "\<H>(f \<circ> X) = \<H>(X)"
hoelzl@36624
  1064
proof (rule antisym)
hoelzl@40859
  1065
  show "\<H>(f \<circ> X) \<le> \<H>(X)" using entropy_data_processing[OF X] .
hoelzl@36624
  1066
next
hoelzl@41689
  1067
  have sf: "simple_function M (f \<circ> X)"
hoelzl@40859
  1068
    using X by auto
hoelzl@36624
  1069
  have "\<H>(X) = \<H>(the_inv_into (X`space M) f \<circ> (f \<circ> X))"
hoelzl@40859
  1070
    by (auto intro!: mutual_information_cong simp: entropy_def the_inv_into_f_f[OF inj])
hoelzl@36624
  1071
  also have "... \<le> \<H>(f \<circ> X)"
hoelzl@40859
  1072
    using entropy_data_processing[OF sf] .
hoelzl@36624
  1073
  finally show "\<H>(X) \<le> \<H>(f \<circ> X)" .
hoelzl@36624
  1074
qed
hoelzl@36624
  1075
hoelzl@36080
  1076
end