author hoelzl Mon May 03 14:35:10 2010 +0200 (2010-05-03) changeset 36624 25153c08655e parent 36623 d26348b667f2 child 36632 f96aa31b739d child 36641 83d4e01ebda5
Cleanup information theory
```     1.1 --- a/src/HOL/Probability/Information.thy	Mon May 03 14:35:10 2010 +0200
1.2 +++ b/src/HOL/Probability/Information.thy	Mon May 03 14:35:10 2010 +0200
1.3 @@ -2,168 +2,263 @@
1.4  imports Probability_Space Product_Measure "../Multivariate_Analysis/Convex"
1.5  begin
1.6
1.7 -lemma pos_neg_part_abs:
1.8 -  fixes f :: "'a \<Rightarrow> real"
1.9 -  shows "pos_part f x + neg_part f x = \<bar>f x\<bar>"
1.10 -unfolding real_abs_def pos_part_def neg_part_def by auto
1.11 +section "Convex theory"
1.12
1.13 -lemma pos_part_abs:
1.14 -  fixes f :: "'a \<Rightarrow> real"
1.15 -  shows "pos_part (\<lambda> x. \<bar>f x\<bar>) y = \<bar>f y\<bar>"
1.16 -unfolding pos_part_def real_abs_def by auto
1.17 -
1.18 -lemma neg_part_abs:
1.19 -  fixes f :: "'a \<Rightarrow> real"
1.20 -  shows "neg_part (\<lambda> x. \<bar>f x\<bar>) y = 0"
1.21 -unfolding neg_part_def real_abs_def by auto
1.22 +lemma log_setsum:
1.23 +  assumes "finite s" "s \<noteq> {}"
1.24 +  assumes "b > 1"
1.25 +  assumes "(\<Sum> i \<in> s. a i) = 1"
1.26 +  assumes "\<And> i. i \<in> s \<Longrightarrow> a i \<ge> 0"
1.27 +  assumes "\<And> i. i \<in> s \<Longrightarrow> y i \<in> {0 <..}"
1.28 +  shows "log b (\<Sum> i \<in> s. a i * y i) \<ge> (\<Sum> i \<in> s. a i * log b (y i))"
1.29 +proof -
1.30 +  have "convex_on {0 <..} (\<lambda> x. - log b x)"
1.31 +    by (rule minus_log_convex[OF `b > 1`])
1.32 +  hence "- log b (\<Sum> i \<in> s. a i * y i) \<le> (\<Sum> i \<in> s. a i * - log b (y i))"
1.33 +    using convex_on_setsum[of _ _ "\<lambda> x. - log b x"] assms pos_is_convex by fastsimp
1.34 +  thus ?thesis by (auto simp add:setsum_negf le_imp_neg_le)
1.35 +qed
1.36
1.37 -lemma (in measure_space) int_abs:
1.38 -  assumes "integrable f"
1.39 -  shows "integrable (\<lambda> x. \<bar>f x\<bar>)"
1.40 -using assms
1.41 +lemma log_setsum':
1.42 +  assumes "finite s" "s \<noteq> {}"
1.43 +  assumes "b > 1"
1.44 +  assumes "(\<Sum> i \<in> s. a i) = 1"
1.45 +  assumes pos: "\<And> i. i \<in> s \<Longrightarrow> 0 \<le> a i"
1.46 +          "\<And> i. \<lbrakk> i \<in> s ; 0 < a i \<rbrakk> \<Longrightarrow> 0 < y i"
1.47 +  shows "log b (\<Sum> i \<in> s. a i * y i) \<ge> (\<Sum> i \<in> s. a i * log b (y i))"
1.48  proof -
1.49 -  from assms obtain p q where pq: "p \<in> nnfis (pos_part f)" "q \<in> nnfis (neg_part f)"
1.50 -    unfolding integrable_def by auto
1.51 -  hence "p + q \<in> nnfis (\<lambda> x. pos_part f x + neg_part f x)"
1.52 -    using nnfis_add by auto
1.53 -  hence "p + q \<in> nnfis (\<lambda> x. \<bar>f x\<bar>)" using pos_neg_part_abs[of f] by simp
1.54 -  thus ?thesis unfolding integrable_def
1.55 -    using ext[OF pos_part_abs[of f], of "\<lambda> y. y"]
1.56 -      ext[OF neg_part_abs[of f], of "\<lambda> y. y"]
1.57 -    using nnfis_0 by auto
1.58 +  have "\<And>y. (\<Sum> i \<in> s - {i. a i = 0}. a i * y i) = (\<Sum> i \<in> s. a i * y i)"
1.59 +    using assms by (auto intro!: setsum_mono_zero_cong_left)
1.60 +  moreover have "log b (\<Sum> i \<in> s - {i. a i = 0}. a i * y i) \<ge> (\<Sum> i \<in> s - {i. a i = 0}. a i * log b (y i))"
1.61 +  proof (rule log_setsum)
1.62 +    have "setsum a (s - {i. a i = 0}) = setsum a s"
1.63 +      using assms(1) by (rule setsum_mono_zero_cong_left) auto
1.64 +    thus sum_1: "setsum a (s - {i. a i = 0}) = 1"
1.65 +      "finite (s - {i. a i = 0})" using assms by simp_all
1.66 +
1.67 +    show "s - {i. a i = 0} \<noteq> {}"
1.68 +    proof
1.69 +      assume *: "s - {i. a i = 0} = {}"
1.70 +      hence "setsum a (s - {i. a i = 0}) = 0" by (simp add: * setsum_empty)
1.71 +      with sum_1 show False by simp
1.72 +qed
1.73 +
1.74 +    fix i assume "i \<in> s - {i. a i = 0}"
1.75 +    hence "i \<in> s" "a i \<noteq> 0" by simp_all
1.76 +    thus "0 \<le> a i" "y i \<in> {0<..}" using pos[of i] by auto
1.77 +  qed fact+
1.78 +  ultimately show ?thesis by simp
1.79  qed
1.80
1.81 -lemma (in measure_space) measure_mono:
1.82 -  assumes "a \<subseteq> b" "a \<in> sets M" "b \<in> sets M"
1.83 -  shows "measure M a \<le> measure M b"
1.84 +section "Information theory"
1.85 +
1.86 +lemma (in finite_prob_space) sum_over_space_distrib:
1.87 +  "(\<Sum>x\<in>X`space M. distribution X {x}) = 1"
1.88 +  unfolding distribution_def prob_space[symmetric] using finite_space
1.90 +     (auto simp add: disjoint_family_on_def sets_eq_Pow intro!: arg_cong[where f=prob])
1.91 +
1.92 +locale finite_information_space = finite_prob_space +
1.93 +  fixes b :: real assumes b_gt_1: "1 < b"
1.94 +
1.95 +definition
1.96 +  "KL_divergence b M X Y =
1.97 +    measure_space.integral (M\<lparr>measure := X\<rparr>)
1.98 +                           (\<lambda>x. log b ((measure_space.RN_deriv (M \<lparr>measure := Y\<rparr> ) X) x))"
1.99 +
1.100 +lemma (in finite_prob_space) distribution_mono:
1.101 +  assumes "\<And>t. \<lbrakk> t \<in> space M ; X t \<in> x \<rbrakk> \<Longrightarrow> Y t \<in> y"
1.102 +  shows "distribution X x \<le> distribution Y y"
1.103 +  unfolding distribution_def
1.104 +  using assms by (auto simp: sets_eq_Pow intro!: measure_mono)
1.105 +
1.106 +lemma (in prob_space) distribution_remove_const:
1.107 +  shows "joint_distribution X (\<lambda>x. ()) {(x, ())} = distribution X {x}"
1.108 +  and "joint_distribution (\<lambda>x. ()) X {((), x)} = distribution X {x}"
1.109 +  and "joint_distribution X (\<lambda>x. (Y x, ())) {(x, y, ())} = joint_distribution X Y {(x, y)}"
1.110 +  and "joint_distribution X (\<lambda>x. ((), Y x)) {(x, (), y)} = joint_distribution X Y {(x, y)}"
1.111 +  and "distribution (\<lambda>x. ()) {()} = 1"
1.112 +  unfolding prob_space[symmetric]
1.113 +  by (auto intro!: arg_cong[where f=prob] simp: distribution_def)
1.114 +
1.115 +
1.116 +context finite_information_space
1.117 +begin
1.118 +
1.119 +lemma distribution_mono_gt_0:
1.120 +  assumes gt_0: "0 < distribution X x"
1.121 +  assumes *: "\<And>t. \<lbrakk> t \<in> space M ; X t \<in> x \<rbrakk> \<Longrightarrow> Y t \<in> y"
1.122 +  shows "0 < distribution Y y"
1.123 +  by (rule less_le_trans[OF gt_0 distribution_mono]) (rule *)
1.124 +
1.125 +lemma
1.126 +  assumes "0 \<le> A" and pos: "0 < A \<Longrightarrow> 0 < B" "0 < A \<Longrightarrow> 0 < C"
1.127 +  shows mult_log_mult: "A * log b (B * C) = A * log b B + A * log b C" (is "?mult")
1.128 +  and mult_log_divide: "A * log b (B / C) = A * log b B - A * log b C" (is "?div")
1.129  proof -
1.130 -  have "b = a \<union> (b - a)" using assms by auto
1.131 -  moreover have "{} = a \<inter> (b - a)" by auto
1.132 -  ultimately have "measure M b = measure M a + measure M (b - a)"
1.133 -    using measure_additive[of a "b - a"] local.Diff[of b a] assms by auto
1.134 -  moreover have "measure M (b - a) \<ge> 0" using positive assms by auto
1.135 -  ultimately show "measure M a \<le> measure M b" by auto
1.136 +  have "?mult \<and> ?div"
1.137 +proof (cases "A = 0")
1.138 +  case False
1.139 +  hence "0 < A" using `0 \<le> A` by auto
1.140 +    with pos[OF this] show "?mult \<and> ?div" using b_gt_1
1.141 +      by (auto simp: log_divide log_mult field_simps)
1.142 +qed simp
1.143 +  thus ?mult and ?div by auto
1.144  qed
1.145
1.146 -lemma (in measure_space) integral_0:
1.147 -  fixes f :: "'a \<Rightarrow> real"
1.148 -  assumes "integrable f" "integral f = 0" "nonneg f" and borel: "f \<in> borel_measurable M"
1.149 -  shows "measure M ({x. f x \<noteq> 0} \<inter> space M) = 0"
1.150 -proof -
1.151 -  have "{x. f x \<noteq> 0} = {x. \<bar>f x\<bar> > 0}" by auto
1.152 -  moreover
1.153 -  { fix y assume "y \<in> {x. \<bar> f x \<bar> > 0}"
1.154 -    hence "\<bar> f y \<bar> > 0" by auto
1.155 -    hence "\<exists> n. \<bar>f y\<bar> \<ge> inverse (real (Suc n))"
1.156 -      using ex_inverse_of_nat_Suc_less[of "\<bar>f y\<bar>"] less_imp_le unfolding real_of_nat_def by auto
1.157 -    hence "y \<in> (\<Union> n. {x. \<bar>f x\<bar> \<ge> inverse (real (Suc n))})"
1.158 -      by auto }
1.159 -  moreover
1.160 -  { fix y assume "y \<in> (\<Union> n. {x. \<bar>f x\<bar> \<ge> inverse (real (Suc n))})"
1.161 -    then obtain n where n: "y \<in> {x. \<bar>f x\<bar> \<ge> inverse (real (Suc n))}" by auto
1.162 -    hence "\<bar>f y\<bar> \<ge> inverse (real (Suc n))" by auto
1.163 -    hence "\<bar>f y\<bar> > 0"
1.164 -      using real_of_nat_Suc_gt_zero
1.165 -        positive_imp_inverse_positive[of "real_of_nat (Suc n)"] by fastsimp
1.166 -    hence "y \<in> {x. \<bar>f x\<bar> > 0}" by auto }
1.167 -  ultimately have fneq0_UN: "{x. f x \<noteq> 0} = (\<Union> n. {x. \<bar>f x\<bar> \<ge> inverse (real (Suc n))})"
1.168 -    by blast
1.169 -  { fix n
1.170 -    have int_one: "integrable (\<lambda> x. \<bar>f x\<bar> ^ 1)" using int_abs assms by auto
1.171 -    have "measure M (f -` {inverse (real (Suc n))..} \<inter> space M)
1.172 -           \<le> integral (\<lambda> x. \<bar>f x\<bar> ^ 1) / (inverse (real (Suc n)) ^ 1)"
1.173 -      using markov_ineq[OF `integrable f` _ int_one] real_of_nat_Suc_gt_zero by auto
1.174 -    hence le0: "measure M (f -` {inverse (real (Suc n))..} \<inter> space M) \<le> 0"
1.175 -      using assms unfolding nonneg_def by auto
1.176 -    have "{x. f x \<ge> inverse (real (Suc n))} \<inter> space M \<in> sets M"
1.177 -      apply (subst Int_commute) unfolding Int_def
1.178 -      using borel[unfolded borel_measurable_ge_iff] by simp
1.179 -    hence m0: "measure M ({x. f x \<ge> inverse (real (Suc n))} \<inter> space M) = 0 \<and>
1.180 -      {x. f x \<ge> inverse (real (Suc n))} \<inter> space M \<in> sets M"
1.181 -      using positive le0 unfolding atLeast_def by fastsimp }
1.182 -  moreover hence "range (\<lambda> n. {x. f x \<ge> inverse (real (Suc n))} \<inter> space M) \<subseteq> sets M"
1.183 -    by auto
1.184 -  moreover
1.185 -  { fix n
1.186 -    have "inverse (real (Suc n)) \<ge> inverse (real (Suc (Suc n)))"
1.187 -      using less_imp_inverse_less real_of_nat_Suc_gt_zero[of n] by fastsimp
1.188 -    hence "\<And> x. f x \<ge> inverse (real (Suc n)) \<Longrightarrow> f x \<ge> inverse (real (Suc (Suc n)))" by (rule order_trans)
1.189 -    hence "{x. f x \<ge> inverse (real (Suc n))} \<inter> space M
1.190 -         \<subseteq> {x. f x \<ge> inverse (real (Suc (Suc n)))} \<inter> space M" by auto }
1.191 -  ultimately have "(\<lambda> x. 0) ----> measure M (\<Union> n. {x. f x \<ge> inverse (real (Suc n))} \<inter> space M)"
1.192 -    using monotone_convergence[of "\<lambda> n. {x. f x \<ge> inverse (real (Suc n))} \<inter> space M"]
1.193 -    unfolding o_def by (simp del: of_nat_Suc)
1.194 -  hence "measure M (\<Union> n. {x. f x \<ge> inverse (real (Suc n))} \<inter> space M) = 0"
1.195 -    using LIMSEQ_const[of 0] LIMSEQ_unique by simp
1.196 -  hence "measure M ((\<Union> n. {x. \<bar>f x\<bar> \<ge> inverse (real (Suc n))}) \<inter> space M) = 0"
1.197 -    using assms unfolding nonneg_def by auto
1.198 -  thus "measure M ({x. f x \<noteq> 0} \<inter> space M) = 0" using fneq0_UN by simp
1.199 +lemma split_pairs:
1.200 +  shows
1.201 +    "((A, B) = X) \<longleftrightarrow> (fst X = A \<and> snd X = B)" and
1.202 +    "(X = (A, B)) \<longleftrightarrow> (fst X = A \<and> snd X = B)" by auto
1.203 +
1.204 +ML {*
1.205 +
1.206 +  (* tactic to solve equations of the form @{term "W * log b (X / (Y * Z)) = W * log b X - W * log b (Y * Z)"}
1.207 +     where @{term W} is a joint distribution of @{term X}, @{term Y}, and @{term Z}. *)
1.208 +
1.209 +  val mult_log_intros = [@{thm mult_log_divide}, @{thm mult_log_mult}]
1.210 +  val intros = [@{thm divide_pos_pos}, @{thm mult_pos_pos}, @{thm positive_distribution}]
1.211 +
1.212 +  val distribution_gt_0_tac = (rtac @{thm distribution_mono_gt_0}
1.213 +    THEN' assume_tac
1.214 +    THEN' clarsimp_tac (clasimpset_of @{context} addsimps2 @{thms split_pairs}))
1.215 +
1.216 +  val distr_mult_log_eq_tac = REPEAT_ALL_NEW (CHANGED o TRY o
1.217 +    (resolve_tac (mult_log_intros @ intros)
1.218 +      ORELSE' distribution_gt_0_tac
1.219 +      ORELSE' clarsimp_tac (clasimpset_of @{context})))
1.220 +
1.221 +  fun instanciate_term thy redex intro =
1.222 +    let
1.223 +      val intro_concl = Thm.concl_of intro
1.224 +
1.225 +      val lhs = intro_concl |> HOLogic.dest_Trueprop |> HOLogic.dest_eq |> fst
1.226 +
1.227 +      val m = SOME (Pattern.match thy (lhs, redex) (Vartab.empty, Vartab.empty))
1.228 +        handle Pattern.MATCH => NONE
1.229 +
1.230 +    in
1.231 +      Option.map (fn m => Envir.subst_term m intro_concl) m
1.232 +    end
1.233 +
1.234 +  fun mult_log_simproc simpset redex =
1.235 +  let
1.236 +    val ctxt = Simplifier.the_context simpset
1.237 +    val thy = ProofContext.theory_of ctxt
1.238 +    fun prove (SOME thm) = (SOME
1.239 +          (Goal.prove ctxt [] [] thm (K (distr_mult_log_eq_tac 1))
1.240 +           |> mk_meta_eq)
1.241 +            handle THM _ => NONE)
1.242 +      | prove NONE = NONE
1.243 +  in
1.244 +    get_first (instanciate_term thy (term_of redex) #> prove) mult_log_intros
1.245 +  end
1.246 +*}
1.247 +
1.248 +simproc_setup mult_log ("distribution X x * log b (A * B)" |
1.249 +                        "distribution X x * log b (A / B)") = {* K mult_log_simproc *}
1.250 +
1.251 +end
1.252 +
1.253 +lemma KL_divergence_eq_finite:
1.254 +  assumes u: "finite_measure_space (M\<lparr>measure := u\<rparr>)"
1.255 +  assumes v: "finite_measure_space (M\<lparr>measure := v\<rparr>)"
1.256 +  assumes u_0: "\<And>x. \<lbrakk> x \<in> space M ; v {x} = 0 \<rbrakk> \<Longrightarrow> u {x} = 0"
1.257 +  shows "KL_divergence b M u v = (\<Sum>x\<in>space M. u {x} * log b (u {x} / v {x}))" (is "_ = ?sum")
1.259 +  have ms_u: "measure_space (M\<lparr>measure := u\<rparr>)"
1.260 +    using u unfolding finite_measure_space_def by simp
1.261 +
1.262 +  show "(\<Sum>x \<in> space M. log b (measure_space.RN_deriv (M\<lparr>measure := v\<rparr>) u x) * u {x}) = ?sum"
1.263 +    apply (rule setsum_cong[OF refl])
1.264 +    apply simp
1.265 +    apply (safe intro!: arg_cong[where f="log b"] )
1.266 +    apply (subst finite_measure_space.RN_deriv_finite_singleton)
1.267 +    using assms ms_u by auto
1.268  qed
1.269
1.270 -definition
1.271 -  "KL_divergence b M u v =
1.272 -    measure_space.integral (M\<lparr>measure := u\<rparr>)
1.273 -                           (\<lambda>x. log b ((measure_space.RN_deriv (M \<lparr>measure := v\<rparr> ) u) x))"
1.274 -
1.275 -lemma (in finite_prob_space) finite_measure_space:
1.276 -  shows "finite_measure_space \<lparr> space = X ` space M, sets = Pow (X ` space M), measure = distribution X\<rparr>"
1.277 -    (is "finite_measure_space ?S")
1.279 -  show "finite (X ` space M)" using finite_space by simp
1.280 -
1.281 -  show "positive ?S (distribution X)" unfolding distribution_def
1.282 -    unfolding positive_def using positive'[unfolded positive_def] sets_eq_Pow by auto
1.283 +lemma log_setsum_divide:
1.284 +  assumes "finite S" and "S \<noteq> {}" and "1 < b"
1.285 +  assumes "(\<Sum>x\<in>S. g x) = 1"
1.286 +  assumes pos: "\<And>x. x \<in> S \<Longrightarrow> g x \<ge> 0" "\<And>x. x \<in> S \<Longrightarrow> f x \<ge> 0"
1.287 +  assumes g_pos: "\<And>x. \<lbrakk> x \<in> S ; 0 < g x \<rbrakk> \<Longrightarrow> 0 < f x"
1.288 +  shows "- (\<Sum>x\<in>S. g x * log b (g x / f x)) \<le> log b (\<Sum>x\<in>S. f x)"
1.289 +proof -
1.290 +  have log_mono: "\<And>x y. 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log b x \<le> log b y"
1.291 +    using `1 < b` by (subst log_le_cancel_iff) auto
1.292
1.294 -  proof (simp, safe)
1.295 -    fix x y
1.296 -    have x: "(X -` x) \<inter> space M \<in> sets M"
1.297 -      and y: "(X -` y) \<inter> space M \<in> sets M" using sets_eq_Pow by auto
1.298 -    assume "x \<inter> y = {}"
1.300 -    have "prob (((X -` x) \<union> (X -` y)) \<inter> space M) =
1.301 -      prob ((X -` x) \<inter> space M) + prob ((X -` y) \<inter> space M)"
1.302 -      apply (subst Int_Un_distrib2)
1.303 -      by auto
1.304 -    thus "prob ((X -` x \<union> X -` y) \<inter> space M) = prob (X -` x \<inter> space M) + prob (X -` y \<inter> space M)"
1.305 -      by auto
1.306 +  have "- (\<Sum>x\<in>S. g x * log b (g x / f x)) = (\<Sum>x\<in>S. g x * log b (f x / g x))"
1.307 +  proof (unfold setsum_negf[symmetric], rule setsum_cong)
1.308 +    fix x assume x: "x \<in> S"
1.309 +    show "- (g x * log b (g x / f x)) = g x * log b (f x / g x)"
1.310 +    proof (cases "g x = 0")
1.311 +      case False
1.312 +      with pos[OF x] g_pos[OF x] have "0 < f x" "0 < g x" by simp_all
1.313 +      thus ?thesis using `1 < b` by (simp add: log_divide field_simps)
1.314 +    qed simp
1.315 +  qed rule
1.316 +  also have "... \<le> log b (\<Sum>x\<in>S. g x * (f x / g x))"
1.317 +  proof (rule log_setsum')
1.318 +    fix x assume x: "x \<in> S" "0 < g x"
1.319 +    with g_pos[OF x] show "0 < f x / g x" by (safe intro!: divide_pos_pos)
1.320 +  qed fact+
1.321 +  also have "... = log b (\<Sum>x\<in>S - {x. g x = 0}. f x)" using `finite S`
1.322 +    by (auto intro!: setsum_mono_zero_cong_right arg_cong[where f="log b"]
1.323 +        split: split_if_asm)
1.324 +  also have "... \<le> log b (\<Sum>x\<in>S. f x)"
1.325 +  proof (rule log_mono)
1.326 +    have "0 = (\<Sum>x\<in>S - {x. g x = 0}. 0)" by simp
1.327 +    also have "... < (\<Sum>x\<in>S - {x. g x = 0}. f x)" (is "_ < ?sum")
1.328 +    proof (rule setsum_strict_mono)
1.329 +      show "finite (S - {x. g x = 0})" using `finite S` by simp
1.330 +      show "S - {x. g x = 0} \<noteq> {}"
1.331 +      proof
1.332 +        assume "S - {x. g x = 0} = {}"
1.333 +        hence "(\<Sum>x\<in>S. g x) = 0" by (subst setsum_0') auto
1.334 +        with `(\<Sum>x\<in>S. g x) = 1` show False by simp
1.335 +      qed
1.336 +      fix x assume "x \<in> S - {x. g x = 0}"
1.337 +      thus "0 < f x" using g_pos[of x] pos(1)[of x] by auto
1.338 +    qed
1.339 +    finally show "0 < ?sum" .
1.340 +    show "(\<Sum>x\<in>S - {x. g x = 0}. f x) \<le> (\<Sum>x\<in>S. f x)"
1.341 +      using `finite S` pos by (auto intro!: setsum_mono2)
1.342    qed
1.343 +  finally show ?thesis .
1.344  qed
1.345
1.346 -lemma (in finite_prob_space) finite_prob_space:
1.347 -  "finite_prob_space \<lparr> space = X ` space M, sets = Pow (X ` space M), measure = distribution X\<rparr>"
1.348 -  (is "finite_prob_space ?S")
1.349 -  unfolding finite_prob_space_def prob_space_def prob_space_axioms_def
1.350 -proof safe
1.351 -  show "finite_measure_space ?S" by (rule finite_measure_space)
1.352 -  thus "measure_space ?S" by (simp add: finite_measure_space_def)
1.353 +lemma KL_divergence_positive_finite:
1.354 +  assumes u: "finite_prob_space (M\<lparr>measure := u\<rparr>)"
1.355 +  assumes v: "finite_prob_space (M\<lparr>measure := v\<rparr>)"
1.356 +  assumes u_0: "\<And>x. \<lbrakk> x \<in> space M ; v {x} = 0 \<rbrakk> \<Longrightarrow> u {x} = 0"
1.357 +  and "1 < b"
1.358 +  shows "0 \<le> KL_divergence b M u v"
1.359 +proof -
1.360 +  interpret u: finite_prob_space "M\<lparr>measure := u\<rparr>" using u .
1.361 +  interpret v: finite_prob_space "M\<lparr>measure := v\<rparr>" using v .
1.362
1.363 -  have "X -` X ` space M \<inter> space M = space M" by auto
1.364 -  thus "measure ?S (space ?S) = 1"
1.365 -    by (simp add: distribution_def prob_space)
1.366 -qed
1.367 +  have *: "space M \<noteq> {}" using u.not_empty by simp
1.368
1.369 -lemma (in finite_prob_space) finite_measure_space_image_prod:
1.370 -  "finite_measure_space \<lparr>space = X ` space M \<times> Y ` space M,
1.371 -    sets = Pow (X ` space M \<times> Y ` space M), measure_space.measure = distribution (\<lambda>x. (X x, Y x))\<rparr>"
1.372 -  (is "finite_measure_space ?Z")
1.374 -  show "finite (X ` space M \<times> Y ` space M)" using finite_space by simp
1.375 +  have "- (KL_divergence b M u v) \<le> log b (\<Sum>x\<in>space M. v {x})"
1.376 +  proof (subst KL_divergence_eq_finite, safe intro!: log_setsum_divide *)
1.377 +    show "finite_measure_space (M\<lparr>measure := u\<rparr>)"
1.378 +      "finite_measure_space (M\<lparr>measure := v\<rparr>)"
1.379 +       using u v unfolding finite_prob_space_eq by simp_all
1.380
1.381 -  let ?d = "distribution (\<lambda>x. (X x, Y x))"
1.382 +     show "finite (space M)" using u.finite_space by simp
1.383 +     show "1 < b" by fact
1.384 +     show "(\<Sum>x\<in>space M. u {x}) = 1" using u.sum_over_space_eq_1 by simp
1.385
1.386 -  show "positive ?Z ?d"
1.387 -    using sets_eq_Pow by (auto simp: positive_def distribution_def intro!: positive)
1.388 +     fix x assume x: "x \<in> space M"
1.389 +     thus pos: "0 \<le> u {x}" "0 \<le> v {x}"
1.390 +       using u.positive u.sets_eq_Pow v.positive v.sets_eq_Pow by simp_all
1.391
1.393 -  proof safe
1.394 -    fix x y assume "x \<in> sets ?Z" and "y \<in> sets ?Z"
1.395 -    assume "x \<inter> y = {}"
1.396 -    thus "?d (x \<union> y) = ?d x + ?d y"
1.397 -      apply (simp add: distribution_def)
1.398 -      apply (subst measure_additive[unfolded sets_eq_Pow, simplified])
1.399 -      by (auto simp: Un_Int_distrib Un_Int_distrib2 intro!: arg_cong[where f=prob])
1.400 +     { assume "v {x} = 0" from u_0[OF x this] show "u {x} = 0" . }
1.401 +     { assume "0 < u {x}"
1.402 +       hence "v {x} \<noteq> 0" using u_0[OF x] by auto
1.403 +       with pos show "0 < v {x}" by simp }
1.404    qed
1.405 +  thus "0 \<le> KL_divergence b M u v" using v.sum_over_space_eq_1 by simp
1.406  qed
1.407
1.408  definition (in prob_space)
1.409 @@ -174,346 +269,142 @@
1.410      in
1.411        KL_divergence b prod_space (joint_distribution X Y) (measure prod_space)"
1.412
1.413 -abbreviation (in finite_prob_space)
1.414 -  finite_mutual_information ("\<I>\<^bsub>_\<^esub>'(_ ; _')") where
1.415 -  "\<I>\<^bsub>b\<^esub>(X ; Y) \<equiv> mutual_information b
1.416 +abbreviation (in finite_information_space)
1.417 +  finite_mutual_information ("\<I>'(_ ; _')") where
1.418 +  "\<I>(X ; Y) \<equiv> mutual_information b
1.419      \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr>
1.420      \<lparr> space = Y`space M, sets = Pow (Y`space M) \<rparr> X Y"
1.421
1.422 -abbreviation (in finite_prob_space)
1.423 -  finite_mutual_information_2 :: "('a \<Rightarrow> 'c) \<Rightarrow> ('a \<Rightarrow> 'd) \<Rightarrow> real" ("\<I>'(_ ; _')") where
1.424 -  "\<I>(X ; Y) \<equiv> \<I>\<^bsub>2\<^esub>(X ; Y)"
1.425 +lemma (in finite_measure_space) measure_spaceI: "measure_space M"
1.426 +  by unfold_locales
1.427
1.428 -lemma (in prob_space) mutual_information_cong:
1.429 -  assumes [simp]: "space S1 = space S3" "sets S1 = sets S3"
1.430 -    "space S2 = space S4" "sets S2 = sets S4"
1.431 -  shows "mutual_information b S1 S2 X Y = mutual_information b S3 S4 X Y"
1.432 -  unfolding mutual_information_def by simp
1.433 +lemma prod_measure_times_finite:
1.434 +  assumes fms: "finite_measure_space M" "finite_measure_space M'" and a: "a \<in> space M \<times> space M'"
1.435 +  shows "prod_measure M M' {a} = measure M {fst a} * measure M' {snd a}"
1.436 +proof (cases a)
1.437 +  case (Pair b c)
1.438 +  hence a_eq: "{a} = {b} \<times> {c}" by simp
1.439
1.440 -lemma (in prob_space) joint_distribution:
1.441 -  "joint_distribution X Y = distribution (\<lambda>x. (X x, Y x))"
1.442 -  unfolding joint_distribution_def_raw distribution_def_raw ..
1.443 +  with fms[THEN finite_measure_space.measure_spaceI]
1.444 +    fms[THEN finite_measure_space.sets_eq_Pow] a Pair
1.445 +  show ?thesis unfolding a_eq
1.446 +    by (subst prod_measure_times) simp_all
1.447 +qed
1.448
1.449 -lemma (in finite_prob_space) finite_mutual_information_reduce:
1.450 -  "\<I>\<^bsub>b\<^esub>(X;Y) = (\<Sum> (x,y) \<in> X ` space M \<times> Y ` space M.
1.451 -    distribution (\<lambda>x. (X x, Y x)) {(x,y)} * log b (distribution (\<lambda>x. (X x, Y x)) {(x,y)} /
1.452 -                                                   (distribution X {x} * distribution Y {y})))"
1.453 -  (is "_ = setsum ?log ?prod")
1.454 -  unfolding Let_def mutual_information_def KL_divergence_def
1.455 -proof (subst finite_measure_space.integral_finite_singleton, simp_all add: joint_distribution)
1.456 -  let ?X = "\<lparr>space = X ` space M, sets = Pow (X ` space M), measure_space.measure = distribution X\<rparr>"
1.457 -  let ?Y = "\<lparr>space = Y ` space M, sets = Pow (Y ` space M), measure_space.measure = distribution Y\<rparr>"
1.458 -  let ?P = "prod_measure_space ?X ?Y"
1.459 +lemma setsum_cartesian_product':
1.460 +  "(\<Sum>x\<in>A \<times> B. f x) = (\<Sum>x\<in>A. setsum (\<lambda>y. f (x, y)) B)"
1.461 +  unfolding setsum_cartesian_product by simp
1.462
1.463 -  interpret X: finite_measure_space "?X" by (rule finite_measure_space)
1.464 -  moreover interpret Y: finite_measure_space "?Y" by (rule finite_measure_space)
1.465 -  ultimately have ms_X: "measure_space ?X" and ms_Y: "measure_space ?Y" by unfold_locales
1.466 -
1.467 -  interpret P: finite_measure_space "?P" by (rule finite_measure_space_finite_prod_measure) (fact+)
1.468 -
1.469 -  let ?P' = "measure_update (\<lambda>_. distribution (\<lambda>x. (X x, Y x))) ?P"
1.470 -  from finite_measure_space_image_prod[of X Y]
1.471 -    sigma_prod_sets_finite[OF X.finite_space Y.finite_space]
1.472 -  show "finite_measure_space ?P'"
1.473 -    by (simp add: X.sets_eq_Pow Y.sets_eq_Pow joint_distribution finite_measure_space_def prod_measure_space_def)
1.474 +lemma (in finite_information_space)
1.475 +  assumes MX: "finite_prob_space \<lparr> space = space MX, sets = sets MX, measure = distribution X\<rparr>"
1.476 +    (is "finite_prob_space ?MX")
1.477 +  assumes MY: "finite_prob_space \<lparr> space = space MY, sets = sets MY, measure = distribution Y\<rparr>"
1.478 +    (is "finite_prob_space ?MY")
1.479 +  and X_space: "X ` space M \<subseteq> space MX" and Y_space: "Y ` space M \<subseteq> space MY"
1.480 +  shows mutual_information_eq_generic:
1.481 +    "mutual_information b MX MY X Y = (\<Sum> (x,y) \<in> space MX \<times> space MY.
1.482 +      joint_distribution X Y {(x,y)} *
1.483 +      log b (joint_distribution X Y {(x,y)} /
1.484 +      (distribution X {x} * distribution Y {y})))"
1.485 +    (is "?equality")
1.486 +  and mutual_information_positive_generic:
1.487 +    "0 \<le> mutual_information b MX MY X Y" (is "?positive")
1.488 +proof -
1.489 +  let ?P = "prod_measure_space ?MX ?MY"
1.490 +  let ?measure = "joint_distribution X Y"
1.491 +  let ?P' = "measure_update (\<lambda>_. ?measure) ?P"
1.492
1.493 -  show "(\<Sum>x \<in> space ?P. log b (measure_space.RN_deriv ?P (distribution (\<lambda>x. (X x, Y x))) x) * distribution (\<lambda>x. (X x, Y x)) {x})
1.494 -    = setsum ?log ?prod"
1.495 -  proof (rule setsum_cong)
1.496 -    show "space ?P = ?prod" unfolding prod_measure_space_def by simp
1.497 -  next
1.498 -    fix x assume x: "x \<in> X ` space M \<times> Y ` space M"
1.499 -    then obtain d e where x_Pair: "x = (d, e)"
1.500 -      and d: "d \<in> X ` space M"
1.501 -      and e: "e \<in> Y ` space M" by auto
1.502 -
1.503 -    { fix x assume m_0: "measure ?P {x} = 0"
1.504 -      have "distribution (\<lambda>x. (X x, Y x)) {x} = 0"
1.505 -      proof (cases x)
1.506 -        case (Pair a b)
1.507 -        hence "(\<lambda>x. (X x, Y x)) -` {x} \<inter> space M = (X -` {a} \<inter> space M) \<inter> (Y -` {b} \<inter> space M)"
1.508 -          and x_prod: "{x} = {a} \<times> {b}" by auto
1.509 +  interpret X: finite_prob_space "?MX" using MX .
1.510 +  moreover interpret Y: finite_prob_space "?MY" using MY .
1.511 +  ultimately have ms_X: "measure_space ?MX"
1.512 +    and ms_Y: "measure_space ?MY" by unfold_locales
1.513
1.514 -        let ?PROD = "(\<lambda>x. (X x, Y x)) -` {x} \<inter> space M"
1.515 +  have fms_P: "finite_measure_space ?P"
1.516 +      by (rule finite_measure_space_finite_prod_measure) fact+
1.517 +
1.518 +  have fms_P': "finite_measure_space ?P'"
1.519 +      using finite_product_measure_space[of "space MX" "space MY"]
1.520 +        X.finite_space Y.finite_space sigma_prod_sets_finite[OF X.finite_space Y.finite_space]
1.521 +        X.sets_eq_Pow Y.sets_eq_Pow
1.522 +      by (simp add: prod_measure_space_def)
1.523
1.524 -        show ?thesis
1.525 -        proof (cases "{a} \<subseteq> X ` space M \<and> {b} \<subseteq> Y ` space M")
1.526 -          case False
1.527 -          hence "?PROD = {}"
1.528 -            unfolding Pair by auto
1.529 -          thus ?thesis by (auto simp: distribution_def)
1.530 -        next
1.531 -          have [intro]: "prob ?PROD \<le> 0 \<Longrightarrow> prob ?PROD = 0"
1.532 -            using sets_eq_Pow by (auto intro!: positive real_le_antisym[of _ 0])
1.533 +  { fix x assume "x \<in> space ?P"
1.534 +    hence x_in_MX: "{fst x} \<in> sets MX" using X.sets_eq_Pow
1.535 +      by (auto simp: prod_measure_space_def)
1.536 +
1.537 +    assume "measure ?P {x} = 0"
1.538 +    with prod_measure_times[OF ms_X ms_Y, of "{fst x}" "{snd x}"] x_in_MX
1.539 +    have "distribution X {fst x} = 0 \<or> distribution Y {snd x} = 0"
1.540 +      by (simp add: prod_measure_space_def)
1.541 +
1.542 +    hence "joint_distribution X Y {x} = 0"
1.543 +      by (cases x) (auto simp: distribution_order) }
1.544 +  note measure_0 = this
1.545
1.546 -          case True
1.547 -          with prod_measure_times[OF ms_X ms_Y, simplified, of "{a}" "{b}"]
1.548 -          have "prob (X -` {a} \<inter> space M) = 0 \<or> prob (Y -` {b} \<inter> space M) = 0" (is "?X_0 \<or> ?Y_0") using m_0
1.549 -            by (simp add: prod_measure_space_def distribution_def Pair)
1.550 -          thus ?thesis
1.551 -          proof (rule disjE)
1.552 -            assume ?X_0
1.553 -            have "prob ?PROD \<le> prob (X -` {a} \<inter> space M)"
1.554 -              using sets_eq_Pow Pair by (auto intro!: measure_mono)
1.555 -            thus ?thesis using `?X_0` by (auto simp: distribution_def)
1.556 -          next
1.557 -            assume ?Y_0
1.558 -            have "prob ?PROD \<le> prob (Y -` {b} \<inter> space M)"
1.559 -              using sets_eq_Pow Pair by (auto intro!: measure_mono)
1.560 -            thus ?thesis using `?Y_0` by (auto simp: distribution_def)
1.561 -          qed
1.562 -        qed
1.563 -      qed }
1.564 -    note measure_zero_joint_distribution = this
1.565 +  show ?equality
1.566 +    unfolding Let_def mutual_information_def using fms_P fms_P' measure_0 MX MY
1.567 +    by (subst KL_divergence_eq_finite)
1.568 +       (simp_all add: prod_measure_space_def prod_measure_times_finite
1.569 +         finite_prob_space_eq setsum_cartesian_product')
1.570
1.571 -    show "log b (measure_space.RN_deriv ?P (distribution (\<lambda>x. (X x, Y x))) x) * distribution (\<lambda>x. (X x, Y x)) {x} = ?log x"
1.572 -    apply (cases "distribution (\<lambda>x. (X x, Y x)) {x} \<noteq> 0")
1.573 -    apply (subst P.RN_deriv_finite_singleton)
1.574 -    proof (simp_all add: x_Pair)
1.575 -      from `finite_measure_space ?P'` show "measure_space ?P'" by (simp add: finite_measure_space_def)
1.576 -    next
1.577 -      fix x assume m_0: "measure ?P {x} = 0" thus "distribution (\<lambda>x. (X x, Y x)) {x} = 0" by fact
1.578 -    next
1.579 -      show "(d,e) \<in> space ?P" unfolding prod_measure_space_def using x x_Pair by simp
1.580 -    next
1.581 -      assume jd_0: "distribution (\<lambda>x. (X x, Y x)) {(d, e)} \<noteq> 0"
1.582 -      show "measure ?P {(d,e)} \<noteq> 0"
1.583 -      proof
1.584 -        assume "measure ?P {(d,e)} = 0"
1.585 -        from measure_zero_joint_distribution[OF this] jd_0
1.586 -        show False by simp
1.587 -      qed
1.588 -    next
1.589 -      assume jd_0: "distribution (\<lambda>x. (X x, Y x)) {(d, e)} \<noteq> 0"
1.590 -      with prod_measure_times[OF ms_X ms_Y, simplified, of "{d}" "{e}"] d
1.591 -      show "log b (distribution (\<lambda>x. (X x, Y x)) {(d, e)} / measure ?P {(d, e)}) =
1.592 -        log b (distribution (\<lambda>x. (X x, Y x)) {(d, e)} / (distribution X {d} * distribution Y {e}))"
1.593 -        by (auto intro!: arg_cong[where f="log b"] simp: prod_measure_space_def)
1.594 -    qed
1.595 +  show ?positive
1.596 +    unfolding Let_def mutual_information_def using measure_0 b_gt_1
1.597 +  proof (safe intro!: KL_divergence_positive_finite, simp_all)
1.598 +    from ms_X ms_Y X.top Y.top X.prob_space Y.prob_space
1.599 +    have "measure ?P (space ?P) = 1"
1.600 +      by (simp add: prod_measure_space_def, subst prod_measure_times, simp_all)
1.601 +    with fms_P show "finite_prob_space ?P"
1.602 +      by (simp add: finite_prob_space_eq)
1.603 +
1.604 +    from ms_X ms_Y X.top Y.top X.prob_space Y.prob_space Y.not_empty X_space Y_space
1.605 +    have "measure ?P' (space ?P') = 1" unfolding prob_space[symmetric]
1.606 +      by (auto simp add: prod_measure_space_def distribution_def vimage_Times comp_def
1.607 +        intro!: arg_cong[where f=prob])
1.608 +    with fms_P' show "finite_prob_space ?P'"
1.609 +      by (simp add: finite_prob_space_eq)
1.610    qed
1.611  qed
1.612
1.613 -lemma (in finite_prob_space) distribution_log_split:
1.614 -  assumes "1 < b"
1.615 -  shows
1.616 -  "distribution (\<lambda>x. (X x, Z x)) {(X x, z)} * log b (distribution (\<lambda>x. (X x, Z x)) {(X x, z)} /
1.617 -                                                     (distribution X {X x} * distribution Z {z})) =
1.618 -   distribution (\<lambda>x. (X x, Z x)) {(X x, z)} * log b (distribution (\<lambda>x. (X x, Z x)) {(X x, z)} /
1.619 -                                                     distribution Z {z}) -
1.620 -   distribution (\<lambda>x. (X x, Z x)) {(X x, z)} * log b (distribution X {X x})"
1.621 -  (is "?lhs = ?rhs")
1.622 -proof (cases "distribution (\<lambda>x. (X x, Z x)) {(X x, z)} = 0")
1.623 -  case True thus ?thesis by simp
1.624 -next
1.625 -  case False
1.626 -
1.627 -  let ?dZ = "distribution Z"
1.628 -  let ?dX = "distribution X"
1.629 -  let ?dXZ = "distribution (\<lambda>x. (X x, Z x))"
1.630 -
1.631 -  have dist_nneg: "\<And>x X. 0 \<le> distribution X x"
1.632 -    unfolding distribution_def using sets_eq_Pow by (auto intro: positive)
1.633 -
1.634 -  have "?lhs = ?dXZ {(X x, z)} * (log b (?dXZ {(X x, z)} / ?dZ {z}) - log b (?dX {X x}))"
1.635 -  proof -
1.636 -    have pos_dXZ: "0 < ?dXZ {(X x, z)}"
1.637 -      using False dist_nneg[of "\<lambda>x. (X x, Z x)" "{(X x, z)}"] by auto
1.638 -    moreover
1.639 -    have "((\<lambda>x. (X x, Z x)) -` {(X x, z)}) \<inter> space M \<subseteq> (X -` {X x}) \<inter> space M" by auto
1.640 -    hence "?dXZ {(X x, z)} \<le> ?dX {X x}"
1.641 -      unfolding distribution_def
1.642 -      by (rule measure_mono) (simp_all add: sets_eq_Pow)
1.643 -    with pos_dXZ have "0 < ?dX {X x}" by (rule less_le_trans)
1.644 -    moreover
1.645 -    have "((\<lambda>x. (X x, Z x)) -` {(X x, z)}) \<inter> space M \<subseteq> (Z -` {z}) \<inter> space M" by auto
1.646 -    hence "?dXZ {(X x, z)} \<le> ?dZ {z}"
1.647 -      unfolding distribution_def
1.648 -      by (rule measure_mono) (simp_all add: sets_eq_Pow)
1.649 -    with pos_dXZ have "0 < ?dZ {z}" by (rule less_le_trans)
1.650 -    moreover have "0 < b" by (rule less_trans[OF _ `1 < b`]) simp
1.651 -    moreover have "b \<noteq> 1" by (rule ccontr) (insert `1 < b`, simp)
1.652 -    ultimately show ?thesis
1.653 -      using pos_dXZ
1.654 -      apply (subst (2) mult_commute)
1.655 -      apply (subst divide_divide_eq_left[symmetric])
1.656 -      apply (subst log_divide)
1.657 -      by (auto intro: divide_pos_pos)
1.658 -  qed
1.659 -  also have "... = ?rhs"
1.660 -    by (simp add: field_simps)
1.661 -  finally show ?thesis .
1.662 -qed
1.663 -
1.664 -lemma (in finite_prob_space) finite_mutual_information_reduce_prod:
1.665 -  "mutual_information b
1.666 -    \<lparr> space = X ` space M, sets = Pow (X ` space M) \<rparr>
1.667 -    \<lparr> space = Y ` space M \<times> Z ` space M, sets = Pow (Y ` space M \<times> Z ` space M) \<rparr>
1.668 -    X (\<lambda>x. (Y x,Z x)) =
1.669 -    (\<Sum> (x, y, z) \<in> X`space M \<times> Y`space M \<times> Z`space M.
1.670 -      distribution (\<lambda>x. (X x, Y x,Z x)) {(x, y, z)} *
1.671 -      log b (distribution (\<lambda>x. (X x, Y x,Z x)) {(x, y, z)} /
1.672 -              (distribution X {x} * distribution (\<lambda>x. (Y x,Z x)) {(y,z)})))" (is "_ = setsum ?log ?space")
1.673 -  unfolding Let_def mutual_information_def KL_divergence_def using finite_space
1.674 -proof (subst finite_measure_space.integral_finite_singleton,
1.675 -       simp_all add: prod_measure_space_def sigma_prod_sets_finite joint_distribution)
1.676 -  let ?sets = "Pow (X ` space M \<times> Y ` space M \<times> Z ` space M)"
1.677 -    and ?measure = "distribution (\<lambda>x. (X x, Y x, Z x))"
1.678 -  let ?P = "\<lparr> space = ?space, sets = ?sets, measure = ?measure\<rparr>"
1.679 -
1.680 -  show "finite_measure_space ?P"
1.681 -  proof (rule finite_Pow_additivity_sufficient, simp_all)
1.682 -    show "finite ?space" using finite_space by auto
1.683 -
1.684 -    show "positive ?P ?measure"
1.685 -      using sets_eq_Pow by (auto simp: positive_def distribution_def intro!: positive)
1.686 -
1.687 -    show "additive ?P ?measure"
1.689 -      fix x y assume "x \<subseteq> ?space" and "y \<subseteq> ?space"
1.690 -      assume "x \<inter> y = {}"
1.691 -      thus "prob (((\<lambda>x. (X x, Y x, Z x)) -` x \<union> (\<lambda>x. (X x, Y x, Z x)) -` y) \<inter> space M) =
1.692 -            prob ((\<lambda>x. (X x, Y x, Z x)) -` x \<inter> space M) + prob ((\<lambda>x. (X x, Y x, Z x)) -` y \<inter> space M)"
1.693 -        apply (subst measure_additive[unfolded sets_eq_Pow, simplified])
1.694 -        by (auto simp: Un_Int_distrib Un_Int_distrib2 intro!: arg_cong[where f=prob])
1.695 -    qed
1.696 -  qed
1.697 +lemma (in finite_information_space) mutual_information_eq:
1.698 +  "\<I>(X;Y) = (\<Sum> (x,y) \<in> X ` space M \<times> Y ` space M.
1.699 +    distribution (\<lambda>x. (X x, Y x)) {(x,y)} * log b (distribution (\<lambda>x. (X x, Y x)) {(x,y)} /
1.700 +                                                   (distribution X {x} * distribution Y {y})))"
1.701 +  by (subst mutual_information_eq_generic) (simp_all add: finite_prob_space_of_images)
1.702
1.703 -  let ?X = "\<lparr>space = X ` space M, sets = Pow (X ` space M), measure = distribution X\<rparr>"
1.704 -  and ?YZ = "\<lparr>space = Y ` space M \<times> Z ` space M, sets = Pow (Y ` space M \<times> Z ` space M), measure = distribution (\<lambda>x. (Y x, Z x))\<rparr>"
1.705 -  let ?u = "prod_measure ?X ?YZ"
1.706 -
1.707 -  from finite_measure_space[of X] finite_measure_space_image_prod[of Y Z]
1.708 -  have ms_X: "measure_space ?X" and ms_YZ: "measure_space ?YZ"
1.709 -    by (simp_all add: finite_measure_space_def)
1.710 -
1.711 -  show "(\<Sum>x \<in> ?space. log b (measure_space.RN_deriv \<lparr>space=?space, sets=?sets, measure=?u\<rparr>
1.712 -    (distribution (\<lambda>x. (X x, Y x, Z x))) x) * distribution (\<lambda>x. (X x, Y x, Z x)) {x})
1.713 -    = setsum ?log ?space"
1.714 -  proof (rule setsum_cong)
1.715 -    fix x assume x: "x \<in> ?space"
1.716 -    then obtain d e f where x_Pair: "x = (d, e, f)"
1.717 -      and d: "d \<in> X ` space M"
1.718 -      and e: "e \<in> Y ` space M"
1.719 -      and f: "f \<in> Z ` space M" by auto
1.720 -
1.721 -    { fix x assume m_0: "?u {x} = 0"
1.722 -
1.723 -      let ?PROD = "(\<lambda>x. (X x, Y x, Z x)) -` {x} \<inter> space M"
1.724 -      obtain a b c where Pair: "x = (a, b, c)" by (cases x)
1.725 -      hence "?PROD = (X -` {a} \<inter> space M) \<inter> ((\<lambda>x. (Y x, Z x)) -` {(b, c)} \<inter> space M)"
1.726 -        and x_prod: "{x} = {a} \<times> {(b, c)}" by auto
1.727 -
1.728 -      have "distribution (\<lambda>x. (X x, Y x, Z x)) {x} = 0"
1.729 -      proof (cases "{a} \<subseteq> X ` space M")
1.730 -        case False
1.731 -        hence "?PROD = {}"
1.732 -          unfolding Pair by auto
1.733 -        thus ?thesis by (auto simp: distribution_def)
1.734 -      next
1.735 -        have [intro]: "prob ?PROD \<le> 0 \<Longrightarrow> prob ?PROD = 0"
1.736 -          using sets_eq_Pow by (auto intro!: positive real_le_antisym[of _ 0])
1.737 -
1.738 -        case True
1.739 -        with prod_measure_times[OF ms_X ms_YZ, simplified, of "{a}" "{(b,c)}"]
1.740 -        have "prob (X -` {a} \<inter> space M) = 0 \<or> prob ((\<lambda>x. (Y x, Z x)) -` {(b, c)} \<inter> space M) = 0"
1.741 -          (is "prob ?X = 0 \<or> prob ?Y = 0") using m_0
1.742 -          by (simp add: prod_measure_space_def distribution_def Pair)
1.743 -        thus ?thesis
1.744 -        proof (rule disjE)
1.745 -          assume "prob ?X = 0"
1.746 -          have "prob ?PROD \<le> prob ?X"
1.747 -            using sets_eq_Pow Pair by (auto intro!: measure_mono)
1.748 -          thus ?thesis using `prob ?X = 0` by (auto simp: distribution_def)
1.749 -        next
1.750 -          assume "prob ?Y = 0"
1.751 -          have "prob ?PROD \<le> prob ?Y"
1.752 -            using sets_eq_Pow Pair by (auto intro!: measure_mono)
1.753 -          thus ?thesis using `prob ?Y = 0` by (auto simp: distribution_def)
1.754 -        qed
1.755 -      qed }
1.756 -    note measure_zero_joint_distribution = this
1.757 -
1.758 -    from x_Pair d e f finite_space
1.759 -    show "log b (measure_space.RN_deriv \<lparr>space=?space, sets=?sets, measure=?u\<rparr>
1.760 -      (distribution (\<lambda>x. (X x, Y x, Z x))) x) * distribution (\<lambda>x. (X x, Y x, Z x)) {x} = ?log x"
1.761 -    apply (cases "distribution (\<lambda>x. (X x, Y x, Z x)) {x} \<noteq> 0")
1.762 -    apply (subst finite_measure_space.RN_deriv_finite_singleton)
1.763 -    proof simp_all
1.764 -      show "measure_space ?P" using `finite_measure_space ?P` by (simp add: finite_measure_space_def)
1.765 -
1.766 -      from finite_measure_space_finite_prod_measure[OF finite_measure_space[of X]
1.767 -        finite_measure_space_image_prod[of Y Z]] finite_space
1.768 -      show "finite_measure_space \<lparr>space=?space, sets=?sets, measure=?u\<rparr>"
1.769 -        by (simp add: prod_measure_space_def sigma_prod_sets_finite)
1.770 -    next
1.771 -      fix x assume "?u {x} = 0" thus "distribution (\<lambda>x. (X x, Y x, Z x)) {x} = 0" by fact
1.772 -    next
1.773 -      assume jd_0: "distribution (\<lambda>x. (X x, Y x, Z x)) {(d, e, f)} \<noteq> 0"
1.774 -      show "?u {(d,e,f)} \<noteq> 0"
1.775 -      proof
1.776 -        assume "?u {(d, e, f)} = 0"
1.777 -        from measure_zero_joint_distribution[OF this] jd_0
1.778 -        show False by simp
1.779 -      qed
1.780 -    next
1.781 -      assume jd_0: "distribution (\<lambda>x. (X x, Y x, Z x)) {(d, e, f)} \<noteq> 0"
1.782 -      with prod_measure_times[OF ms_X ms_YZ, simplified, of "{d}" "{(e,f)}"] d
1.783 -      show "log b (distribution (\<lambda>x. (X x, Y x, Z x)) {(d, e, f)} / ?u {(d, e, f)}) =
1.784 -        log b (distribution (\<lambda>x. (X x, Y x, Z x)) {(d, e, f)} / (distribution X {d} * distribution (\<lambda>x. (Y x, Z x)) {(e,f)}))"
1.785 -        by (auto intro!: arg_cong[where f="log b"] simp: prod_measure_space_def)
1.786 -    qed
1.787 -  qed simp
1.788 -qed
1.789 +lemma (in finite_information_space) mutual_information_positive: "0 \<le> \<I>(X;Y)"
1.790 +  by (subst mutual_information_positive_generic) (simp_all add: finite_prob_space_of_images)
1.791
1.792  definition (in prob_space)
1.793    "entropy b s X = mutual_information b s s X X"
1.794
1.795 -abbreviation (in finite_prob_space)
1.796 -  finite_entropy ("\<H>\<^bsub>_\<^esub>'(_')") where
1.797 -  "\<H>\<^bsub>b\<^esub>(X) \<equiv> entropy b \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr> X"
1.798 -
1.799 -abbreviation (in finite_prob_space)
1.800 -  finite_entropy_2 ("\<H>'(_')") where
1.801 -  "\<H>(X) \<equiv> \<H>\<^bsub>2\<^esub>(X)"
1.802 +abbreviation (in finite_information_space)
1.803 +  finite_entropy ("\<H>'(_')") where
1.804 +  "\<H>(X) \<equiv> entropy b \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr> X"
1.805
1.806 -lemma (in finite_prob_space) finite_entropy_reduce:
1.807 -  assumes "1 < b"
1.808 -  shows "\<H>\<^bsub>b\<^esub>(X) = -(\<Sum> x \<in> X ` space M. distribution X {x} * log b (distribution X {x}))"
1.809 +lemma (in finite_information_space) joint_distribution_remove[simp]:
1.810 +    "joint_distribution X X {(x, x)} = distribution X {x}"
1.811 +  unfolding distribution_def by (auto intro!: arg_cong[where f=prob])
1.812 +
1.813 +lemma (in finite_information_space) entropy_eq:
1.814 +  "\<H>(X) = -(\<Sum> x \<in> X ` space M. distribution X {x} * log b (distribution X {x}))"
1.815  proof -
1.816 -  have fin: "finite (X ` space M)" using finite_space by simp
1.817 -
1.818 -  have If_mult_distr: "\<And>A B C D. If A B C * D = If A (B * D) (C * D)" by auto
1.819 -
1.820 +  { fix f
1.821    { fix x y
1.822      have "(\<lambda>x. (X x, X x)) -` {(x, y)} = (if x = y then X -` {x} else {})" by auto
1.823 -    hence "distribution (\<lambda>x. (X x, X x))  {(x,y)} = (if x = y then distribution X {x} else 0)"
1.824 +      hence "distribution (\<lambda>x. (X x, X x))  {(x,y)} * f x y = (if x = y then distribution X {x} * f x y else 0)"
1.825        unfolding distribution_def by auto }
1.826 -  moreover
1.827 -  have "\<And>x. 0 \<le> distribution X x"
1.828 -    unfolding distribution_def using finite_space sets_eq_Pow by (auto intro: positive)
1.829 -  hence "\<And>x. distribution X x \<noteq> 0 \<Longrightarrow> 0 < distribution X x" by (auto simp: le_less)
1.830 -  ultimately
1.831 -  show ?thesis using `1 < b`
1.832 -    by (auto intro!: setsum_cong
1.833 -      simp: log_inverse If_mult_distr setsum_cases[OF fin] inverse_eq_divide[symmetric]
1.834 -        entropy_def setsum_negf[symmetric] joint_distribution finite_mutual_information_reduce
1.835 -        setsum_cartesian_product[symmetric])
1.836 +    hence "(\<Sum>(x, y) \<in> X ` space M \<times> X ` space M. joint_distribution X X {(x, y)} * f x y) =
1.837 +      (\<Sum>x \<in> X ` space M. distribution X {x} * f x x)"
1.838 +      unfolding setsum_cartesian_product' by (simp add: setsum_cases finite_space) }
1.839 +  note remove_cartesian_product = this
1.840 +
1.841 +  show ?thesis
1.842 +    unfolding entropy_def mutual_information_eq setsum_negf[symmetric] remove_cartesian_product
1.843 +    by (auto intro!: setsum_cong)
1.844  qed
1.845
1.846 -lemma log_inj: assumes "1 < b" shows "inj_on (log b) {0 <..}"
1.847 -proof (rule inj_onI, simp)
1.848 -  fix x y assume pos: "0 < x" "0 < y" and *: "log b x = log b y"
1.849 -  show "x = y"
1.850 -  proof (cases rule: linorder_cases)
1.851 -    assume "x < y" hence "log b x < log b y"
1.852 -      using log_less_cancel_iff[OF `1 < b`] pos by simp
1.853 -    thus ?thesis using * by simp
1.854 -  next
1.855 -    assume "y < x" hence "log b y < log b x"
1.856 -      using log_less_cancel_iff[OF `1 < b`] pos by simp
1.857 -    thus ?thesis using * by simp
1.858 -  qed simp
1.859 -qed
1.860 +lemma (in finite_information_space) entropy_positive: "0 \<le> \<H>(X)"
1.861 +  unfolding entropy_def using mutual_information_positive .
1.862
1.863  definition (in prob_space)
1.864    "conditional_mutual_information b s1 s2 s3 X Y Z \<equiv>
1.865 @@ -524,160 +415,181 @@
1.866        mutual_information b s1 prod_space X (\<lambda>x. (Y x, Z x)) -
1.867        mutual_information b s1 s3 X Z"
1.868
1.869 -abbreviation (in finite_prob_space)
1.870 -  finite_conditional_mutual_information ("\<I>\<^bsub>_\<^esub>'( _ ; _ | _ ')") where
1.871 -  "\<I>\<^bsub>b\<^esub>(X ; Y | Z) \<equiv> conditional_mutual_information b
1.872 +abbreviation (in finite_information_space)
1.873 +  finite_conditional_mutual_information ("\<I>'( _ ; _ | _ ')") where
1.874 +  "\<I>(X ; Y | Z) \<equiv> conditional_mutual_information b
1.875      \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr>
1.876      \<lparr> space = Y`space M, sets = Pow (Y`space M) \<rparr>
1.877      \<lparr> space = Z`space M, sets = Pow (Z`space M) \<rparr>
1.878      X Y Z"
1.879
1.880 -abbreviation (in finite_prob_space)
1.881 -  finite_conditional_mutual_information_2 ("\<I>'( _ ; _ | _ ')") where
1.882 -  "\<I>(X ; Y | Z) \<equiv> \<I>\<^bsub>2\<^esub>(X ; Y | Z)"
1.883 +lemma (in finite_information_space) setsum_distribution_gen:
1.884 +  assumes "Z -` {c} \<inter> space M = (\<Union>x \<in> X`space M. Y -` {f x}) \<inter> space M"
1.885 +  and "inj_on f (X`space M)"
1.886 +  shows "(\<Sum>x \<in> X`space M. distribution Y {f x}) = distribution Z {c}"
1.887 +  unfolding distribution_def assms
1.888 +  using finite_space assms
1.890 +     (auto simp add: disjoint_family_on_def sets_eq_Pow inj_on_def
1.891 +      intro!: arg_cong[where f=prob])
1.892 +
1.893 +lemma (in finite_information_space) setsum_distribution:
1.894 +  "(\<Sum>x \<in> X`space M. joint_distribution X Y {(x, y)}) = distribution Y {y}"
1.895 +  "(\<Sum>y \<in> Y`space M. joint_distribution X Y {(x, y)}) = distribution X {x}"
1.896 +  "(\<Sum>x \<in> X`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution Y Z {(y, z)}"
1.897 +  "(\<Sum>y \<in> Y`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution X Z {(x, z)}"
1.898 +  "(\<Sum>z \<in> Z`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution X Y {(x, y)}"
1.899 +  by (auto intro!: inj_onI setsum_distribution_gen)
1.900
1.901 -lemma image_pair_eq_Sigma:
1.902 -  "(\<lambda>x. (f x, g x)) ` A = Sigma (f ` A) (\<lambda>x. g ` (f -` {x} \<inter> A))"
1.903 -proof (safe intro!: imageI vimageI, simp_all)
1.904 -  fix a b assume *: "a \<in> A" "b \<in> A" and eq: "f a = f b"
1.905 -  show "(f b, g a) \<in> (\<lambda>x. (f x, g x)) ` A" unfolding eq[symmetric]
1.906 -    using * by auto
1.907 +lemma (in finite_information_space) conditional_mutual_information_eq_sum:
1.908 +   "\<I>(X ; Y | Z) =
1.909 +     (\<Sum>(x, y, z)\<in>X ` space M \<times> (\<lambda>x. (Y x, Z x)) ` space M.
1.910 +             distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)} *
1.911 +             log b (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}/
1.912 +        distribution (\<lambda>x. (Y x, Z x)) {(y, z)})) -
1.913 +     (\<Sum>(x, z)\<in>X ` space M \<times> Z ` space M.
1.914 +        distribution (\<lambda>x. (X x, Z x)) {(x,z)} * log b (distribution (\<lambda>x. (X x, Z x)) {(x,z)} / distribution Z {z}))"
1.915 +  (is "_ = ?rhs")
1.916 +proof -
1.917 +  have setsum_product:
1.918 +    "\<And>f x. (\<Sum>v\<in>(\<lambda>x. (Y x, Z x)) ` space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x,v)} * f v)
1.919 +      = (\<Sum>v\<in>Y ` space M \<times> Z ` space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x,v)} * f v)"
1.920 +  proof (safe intro!: setsum_mono_zero_cong_left imageI)
1.921 +    fix x y z f
1.922 +    assume *: "(Y y, Z z) \<notin> (\<lambda>x. (Y x, Z x)) ` space M" and "y \<in> space M" "z \<in> space M"
1.923 +    hence "(\<lambda>x. (X x, Y x, Z x)) -` {(x, Y y, Z z)} \<inter> space M = {}"
1.924 +    proof safe
1.925 +      fix x' assume x': "x' \<in> space M" and eq: "Y x' = Y y" "Z x' = Z z"
1.926 +      have "(Y y, Z z) \<in> (\<lambda>x. (Y x, Z x)) ` space M" using eq[symmetric] x' by auto
1.927 +      thus "x' \<in> {}" using * by auto
1.928 +    qed
1.929 +    thus "joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, Y y, Z z)} * f (Y y) (Z z) = 0"
1.930 +      unfolding distribution_def by simp
1.931 +  qed (simp add: finite_space)
1.932 +
1.933 +  thus ?thesis
1.934 +    unfolding conditional_mutual_information_def Let_def mutual_information_eq
1.935 +    apply (subst mutual_information_eq_generic)
1.936 +    by (auto simp add: prod_measure_space_def sigma_prod_sets_finite finite_space
1.937 +        finite_prob_space_of_images finite_product_prob_space_of_images
1.938 +        setsum_cartesian_product' setsum_product setsum_subtractf setsum_addf
1.939 +        setsum_left_distrib[symmetric] setsum_distribution
1.940 +      cong: setsum_cong)
1.941  qed
1.942
1.943 -lemma inj_on_swap: "inj_on (\<lambda>(x,y). (y,x)) A" by (auto intro!: inj_onI)
1.944 -
1.945 -lemma (in finite_prob_space) finite_conditional_mutual_information_reduce:
1.946 -  assumes "1 < b"
1.947 -  shows "\<I>\<^bsub>b\<^esub>(X ; Y | Z) =
1.948 -	- (\<Sum> (x, z) \<in> (X ` space M \<times> Z ` space M).
1.949 -             distribution (\<lambda>x. (X x, Z x)) {(x,z)} * log b (distribution (\<lambda>x. (X x, Z x)) {(x,z)} / distribution Z {z}))
1.950 -	+ (\<Sum> (x, y, z) \<in> (X ` space M \<times> (\<lambda>x. (Y x, Z x)) ` space M).
1.951 +lemma (in finite_information_space) conditional_mutual_information_eq:
1.952 +  "\<I>(X ; Y | Z) = (\<Sum>(x, y, z) \<in> X ` space M \<times> Y ` space M \<times> Z ` space M.
1.953               distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)} *
1.954               log b (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}/
1.955 -             distribution (\<lambda>x. (Y x, Z x)) {(y, z)}))" (is "_ = ?rhs")
1.956 -unfolding conditional_mutual_information_def Let_def using finite_space
1.957 -apply (simp add: prod_measure_space_def sigma_prod_sets_finite)
1.958 -apply (subst mutual_information_cong[of _ "\<lparr>space = X ` space M, sets = Pow (X ` space M)\<rparr>"
1.959 -  _ "\<lparr>space = Y ` space M \<times> Z ` space M, sets = Pow (Y ` space M \<times> Z ` space M)\<rparr>"], simp_all)
1.960 -apply (subst finite_mutual_information_reduce_prod, simp_all)
1.961 -apply (subst finite_mutual_information_reduce, simp_all)
1.962 +    (joint_distribution X Z {(x, z)} * joint_distribution Y Z {(y,z)} / distribution Z {z})))"
1.963 +  unfolding conditional_mutual_information_def Let_def mutual_information_eq
1.964 +    apply (subst mutual_information_eq_generic)
1.965 +  by (auto simp add: prod_measure_space_def sigma_prod_sets_finite finite_space
1.966 +      finite_prob_space_of_images finite_product_prob_space_of_images
1.967 +      setsum_cartesian_product' setsum_product setsum_subtractf setsum_addf
1.968 +      setsum_left_distrib[symmetric] setsum_distribution setsum_commute[where A="Y`space M"]
1.969 +    cong: setsum_cong)
1.970 +
1.971 +lemma (in finite_information_space) conditional_mutual_information_eq_mutual_information:
1.972 +  "\<I>(X ; Y) = \<I>(X ; Y | (\<lambda>x. ()))"
1.973 +proof -
1.974 +  have [simp]: "(\<lambda>x. ()) ` space M = {()}" using not_empty by auto
1.975 +
1.976 +  show ?thesis
1.977 +    unfolding conditional_mutual_information_eq mutual_information_eq
1.978 +    by (simp add: setsum_cartesian_product' distribution_remove_const)
1.979 +qed
1.980 +
1.981 +lemma (in finite_information_space) conditional_mutual_information_positive:
1.982 +  "0 \<le> \<I>(X ; Y | Z)"
1.983  proof -
1.984    let ?dXYZ = "distribution (\<lambda>x. (X x, Y x, Z x))"
1.985 -  let ?dXZ = "distribution (\<lambda>x. (X x, Z x))"
1.986 -  let ?dYZ = "distribution (\<lambda>x. (Y x, Z x))"
1.987 +  let ?dXZ = "joint_distribution X Z"
1.988 +  let ?dYZ = "joint_distribution Y Z"
1.989    let ?dX = "distribution X"
1.990 -  let ?dY = "distribution Y"
1.991    let ?dZ = "distribution Z"
1.992 +  let ?M = "X ` space M \<times> Y ` space M \<times> Z ` space M"
1.993 +
1.994 +  have split_beta: "\<And>f. split f = (\<lambda>x. f (fst x) (snd x))" by (simp add: expand_fun_eq)
1.995
1.996 -  have If_mult_distr: "\<And>A B C D. If A B C * D = If A (B * D) (C * D)" by auto
1.997 -  { fix x y
1.998 -    have "(\<lambda>x. (X x, Y x, Z x)) -` {(X x, y)} \<inter> space M =
1.999 -      (if y \<in> (\<lambda>x. (Y x, Z x)) ` space M then (\<lambda>x. (X x, Y x, Z x)) -` {(X x, y)} \<inter> space M else {})" by auto
1.1000 -    hence "?dXYZ {(X x, y)} = (if y \<in> (\<lambda>x. (Y x, Z x)) ` space M then ?dXYZ {(X x, y)} else 0)"
1.1001 -      unfolding distribution_def by auto }
1.1002 -  note split_measure = this
1.1003 -
1.1004 -  have sets: "Y ` space M \<times> Z ` space M \<inter> (\<lambda>x. (Y x, Z x)) ` space M = (\<lambda>x. (Y x, Z x)) ` space M" by auto
1.1005 -
1.1006 -  have cong: "\<And>A B C D. \<lbrakk> A = C ; B = D \<rbrakk> \<Longrightarrow> A + B = C + D" by auto
1.1007 -
1.1008 -  { fix A f have "setsum f A = setsum (\<lambda>(x, y). f (y, x)) ((\<lambda>(x, y). (y, x)) ` A)"
1.1009 -    using setsum_reindex[OF inj_on_swap, of "\<lambda>(x, y). f (y, x)" A] by (simp add: split_twice) }
1.1010 -  note setsum_reindex_swap = this
1.1011 -
1.1012 -  { fix A B f assume *: "finite A" "\<forall>x\<in>A. finite (B x)"
1.1013 -    have "(\<Sum>x\<in>Sigma A B. f x) = (\<Sum>x\<in>A. setsum (\<lambda>y. f (x, y)) (B x))"
1.1014 -      unfolding setsum_Sigma[OF *] by simp }
1.1015 -  note setsum_Sigma = this
1.1016 +  have "- (\<Sum>(x, y, z) \<in> ?M. ?dXYZ {(x, y, z)} *
1.1017 +    log b (?dXYZ {(x, y, z)} / (?dXZ {(x, z)} * ?dYZ {(y,z)} / ?dZ {z})))
1.1018 +    \<le> log b (\<Sum>(x, y, z) \<in> ?M. ?dXZ {(x, z)} * ?dYZ {(y,z)} / ?dZ {z})"
1.1019 +    unfolding split_beta
1.1020 +  proof (rule log_setsum_divide)
1.1021 +    show "?M \<noteq> {}" using not_empty by simp
1.1022 +    show "1 < b" using b_gt_1 .
1.1023
1.1024 -  { fix x
1.1025 -    have "(\<Sum>z\<in>Z ` space M. ?dXZ {(X x, z)}) = (\<Sum>yz\<in>(\<lambda>x. (Y x, Z x)) ` space M. ?dXYZ {(X x, yz)})"
1.1026 -      apply (subst setsum_reindex_swap)
1.1027 -      apply (simp add: image_image distribution_def)
1.1028 -      unfolding image_pair_eq_Sigma
1.1029 -      apply (subst setsum_Sigma)
1.1030 -      using finite_space apply simp_all
1.1031 -      apply (rule setsum_cong[OF refl])
1.1033 -      by (auto simp add: disjoint_family_on_def sets_eq_Pow intro!: arg_cong[where f=prob]) }
1.1034 +    fix x assume "x \<in> ?M"
1.1035 +    show "0 \<le> ?dXYZ {(fst x, fst (snd x), snd (snd x))}" using positive_distribution .
1.1036 +    show "0 \<le> ?dXZ {(fst x, snd (snd x))} * ?dYZ {(fst (snd x), snd (snd x))} / ?dZ {snd (snd x)}"
1.1037 +      by (auto intro!: mult_nonneg_nonneg positive_distribution simp: zero_le_divide_iff)
1.1038
1.1039 -  thus "(\<Sum>(x, y, z)\<in>X ` space M \<times> Y ` space M \<times> Z ` space M.
1.1040 -      ?dXYZ {(x, y, z)} * log b (?dXYZ {(x, y, z)} / (?dX {x} * ?dYZ {(y, z)}))) -
1.1041 -    (\<Sum>(x, y)\<in>X ` space M \<times> Z ` space M.
1.1042 -      ?dXZ {(x, y)} * log b (?dXZ {(x, y)} / (?dX {x} * ?dZ {y}))) =
1.1043 -  - (\<Sum> (x, z) \<in> (X ` space M \<times> Z ` space M).
1.1044 -      ?dXZ {(x,z)} * log b (?dXZ {(x,z)} / ?dZ {z})) +
1.1045 -    (\<Sum> (x, y, z) \<in> (X ` space M \<times> (\<lambda>x. (Y x, Z x)) ` space M).
1.1046 -      ?dXYZ {(x, y, z)} * log b (?dXYZ {(x, y, z)} / ?dYZ {(y, z)}))"
1.1047 -    using finite_space
1.1048 -    apply (auto simp: setsum_cartesian_product[symmetric] setsum_negf[symmetric]
1.1050 -      intro!: setsum_cong[OF refl])
1.1051 -    apply (subst split_measure)
1.1052 -    apply (simp add: If_mult_distr setsum_cases sets distribution_log_split[OF assms, of X])
1.1054 -    by (simp add: setsum_subtractf setsum_negf field_simps setsum_right_distrib[symmetric] sets_eq_Pow)
1.1055 +    assume *: "0 < ?dXYZ {(fst x, fst (snd x), snd (snd x))}"
1.1056 +    thus "0 < ?dXZ {(fst x, snd (snd x))} * ?dYZ {(fst (snd x), snd (snd x))} / ?dZ {snd (snd x)}"
1.1057 +      by (auto intro!: divide_pos_pos mult_pos_pos
1.1058 +           intro: distribution_order(6) distribution_mono_gt_0)
1.1059 +  qed (simp_all add: setsum_cartesian_product' sum_over_space_distrib setsum_distribution finite_space)
1.1060 +  also have "(\<Sum>(x, y, z) \<in> ?M. ?dXZ {(x, z)} * ?dYZ {(y,z)} / ?dZ {z}) = (\<Sum>z\<in>Z`space M. ?dZ {z})"
1.1061 +    apply (simp add: setsum_cartesian_product')
1.1062 +    apply (subst setsum_commute)
1.1063 +    apply (subst (2) setsum_commute)
1.1064 +    by (auto simp: setsum_divide_distrib[symmetric] setsum_product[symmetric] setsum_distribution
1.1065 +          intro!: setsum_cong)
1.1066 +  finally show ?thesis
1.1067 +    unfolding conditional_mutual_information_eq sum_over_space_distrib by simp
1.1068  qed
1.1069
1.1070 +
1.1071  definition (in prob_space)
1.1072    "conditional_entropy b S T X Y = conditional_mutual_information b S S T X X Y"
1.1073
1.1074 -abbreviation (in finite_prob_space)
1.1075 -  finite_conditional_entropy ("\<H>\<^bsub>_\<^esub>'(_ | _')") where
1.1076 -  "\<H>\<^bsub>b\<^esub>(X | Y) \<equiv> conditional_entropy b
1.1077 +abbreviation (in finite_information_space)
1.1078 +  finite_conditional_entropy ("\<H>'(_ | _')") where
1.1079 +  "\<H>(X | Y) \<equiv> conditional_entropy b
1.1080      \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr>
1.1081      \<lparr> space = Y`space M, sets = Pow (Y`space M) \<rparr> X Y"
1.1082
1.1083 -abbreviation (in finite_prob_space)
1.1084 -  finite_conditional_entropy_2 ("\<H>'(_ | _')") where
1.1085 -  "\<H>(X | Y) \<equiv> \<H>\<^bsub>2\<^esub>(X | Y)"
1.1086 +lemma (in finite_information_space) conditional_entropy_positive:
1.1087 +  "0 \<le> \<H>(X | Y)" unfolding conditional_entropy_def using conditional_mutual_information_positive .
1.1088
1.1089 -lemma (in finite_prob_space) finite_conditional_entropy_reduce:
1.1090 -  assumes "1 < b"
1.1091 -  shows "\<H>\<^bsub>b\<^esub>(X | Z) =
1.1092 +lemma (in finite_information_space) conditional_entropy_eq:
1.1093 +  "\<H>(X | Z) =
1.1094       - (\<Sum>(x, z)\<in>X ` space M \<times> Z ` space M.
1.1095           joint_distribution X Z {(x, z)} *
1.1096           log b (joint_distribution X Z {(x, z)} / distribution Z {z}))"
1.1097  proof -
1.1098    have *: "\<And>x y z. (\<lambda>x. (X x, X x, Z x)) -` {(x, y, z)} = (if x = y then (\<lambda>x. (X x, Z x)) -` {(x, z)} else {})" by auto
1.1099    show ?thesis
1.1100 -    unfolding finite_conditional_mutual_information_reduce[OF assms]
1.1101 -      conditional_entropy_def joint_distribution_def distribution_def *
1.1102 +    unfolding conditional_mutual_information_eq_sum
1.1103 +      conditional_entropy_def distribution_def *
1.1104      by (auto intro!: setsum_0')
1.1105  qed
1.1106
1.1107 -lemma (in finite_prob_space) finite_mutual_information_eq_entropy_conditional_entropy:
1.1108 -  assumes "1 < b" shows "\<I>\<^bsub>b\<^esub>(X ; Z) = \<H>\<^bsub>b\<^esub>(X) - \<H>\<^bsub>b\<^esub>(X | Z)" (is "mutual_information b ?X ?Z X Z = _")
1.1109 -  unfolding finite_mutual_information_reduce
1.1110 -    finite_entropy_reduce[OF assms]
1.1111 -    finite_conditional_entropy_reduce[OF assms]
1.1113 +lemma (in finite_information_space) mutual_information_eq_entropy_conditional_entropy:
1.1114 +  "\<I>(X ; Z) = \<H>(X) - \<H>(X | Z)"
1.1115 +  unfolding mutual_information_eq entropy_eq conditional_entropy_eq
1.1116    using finite_space
1.1118 -      setsum_Sigma[symmetric] distribution_log_split[OF assms] setsum_negf[symmetric]
1.1119 -    intro!: setsum_cong[OF refl])
1.1120 -  apply (simp add: setsum_negf setsum_left_distrib[symmetric])
1.1121 -proof (rule disjI2)
1.1122 -  let ?dX = "distribution X"
1.1123 -  and ?dXZ = "distribution (\<lambda>x. (X x, Z x))"
1.1126
1.1127 -  fix x assume "x \<in> space M"
1.1128 -  have "\<And>z. (\<lambda>x. (X x, Z x)) -` {(X x, z)} \<inter> space M = (X -` {X x} \<inter> space M) \<inter> (Z -` {z} \<inter> space M)" by auto
1.1129 -  thus "(\<Sum>z\<in>Z ` space M. distribution (\<lambda>x. (X x, Z x)) {(X x, z)}) = distribution X {X x}"
1.1130 -    unfolding distribution_def
1.1131 -    apply (subst prob_real_sum_image_fn[where e="X -` {X x} \<inter> space M" and s = "Z`space M" and f="\<lambda>z. Z -` {z} \<inter> space M"])
1.1132 -    using finite_space sets_eq_Pow by auto
1.1133 +lemma (in finite_information_space) conditional_entropy_less_eq_entropy:
1.1134 +  "\<H>(X | Z) \<le> \<H>(X)"
1.1135 +proof -
1.1136 +  have "\<I>(X ; Z) = \<H>(X) - \<H>(X | Z)" using mutual_information_eq_entropy_conditional_entropy .
1.1137 +  with mutual_information_positive[of X Z] entropy_positive[of X]
1.1138 +  show ?thesis by auto
1.1139  qed
1.1140
1.1141  (* -------------Entropy of a RV with a certain event is zero---------------- *)
1.1142
1.1143 -lemma (in finite_prob_space) finite_entropy_certainty_eq_0:
1.1144 -  assumes "x \<in> X ` space M" and "distribution X {x} = 1" and "b > 1"
1.1145 -  shows "\<H>\<^bsub>b\<^esub>(X) = 0"
1.1146 +lemma (in finite_information_space) finite_entropy_certainty_eq_0:
1.1147 +  assumes "x \<in> X ` space M" and "distribution X {x} = 1"
1.1148 +  shows "\<H>(X) = 0"
1.1149  proof -
1.1150    interpret X: finite_prob_space "\<lparr> space = X ` space M,
1.1151      sets = Pow (X ` space M),
1.1152 -    measure = distribution X\<rparr>" by (rule finite_prob_space)
1.1153 +    measure = distribution X\<rparr>" by (rule finite_prob_space_of_images)
1.1154
1.1155    have "distribution X (X ` space M - {x}) = distribution X (X ` space M) - distribution X {x}"
1.1156      using X.measure_compl[of "{x}"] assms by auto
1.1157 @@ -694,34 +606,18 @@
1.1158
1.1159    have y: "\<And>y. (if x = y then 1 else 0) * log b (if x = y then 1 else 0) = 0" by simp
1.1160
1.1161 -  show ?thesis
1.1162 -    unfolding finite_entropy_reduce[OF `b > 1`] by (auto simp: y fi)
1.1163 +  show ?thesis unfolding entropy_eq by (auto simp: y fi)
1.1164  qed
1.1165  (* --------------- upper bound on entropy for a rv ------------------------- *)
1.1166
1.1167 -lemma log_setsum:
1.1168 -  assumes "finite s" "s \<noteq> {}"
1.1169 -  assumes "b > 1"
1.1170 -  assumes "(\<Sum> i \<in> s. a i) = 1"
1.1171 -  assumes "\<And> i. i \<in> s \<Longrightarrow> a i \<ge> 0"
1.1172 -  assumes "\<And> i. i \<in> s \<Longrightarrow> y i \<in> {0 <..}"
1.1173 -  shows "log b (\<Sum> i \<in> s. a i * y i) \<ge> (\<Sum> i \<in> s. a i * log b (y i))"
1.1174 -proof -
1.1175 -  have "convex_on {0 <..} (\<lambda> x. - log b x)"
1.1176 -    by (rule minus_log_convex[OF `b > 1`])
1.1177 -  hence "- log b (\<Sum> i \<in> s. a i * y i) \<le> (\<Sum> i \<in> s. a i * - log b (y i))"
1.1178 -    using convex_on_setsum[of _ _ "\<lambda> x. - log b x"] assms pos_is_convex by fastsimp
1.1179 -  thus ?thesis by (auto simp add:setsum_negf le_imp_neg_le)
1.1180 -qed
1.1181 -
1.1182 -lemma (in finite_prob_space) finite_entropy_le_card:
1.1183 -  assumes "1 < b"
1.1184 -  shows "\<H>\<^bsub>b\<^esub>(X) \<le> log b (real (card (X ` space M \<inter> {x . distribution X {x} \<noteq> 0})))"
1.1185 +lemma (in finite_information_space) finite_entropy_le_card:
1.1186 +  "\<H>(X) \<le> log b (real (card (X ` space M \<inter> {x . distribution X {x} \<noteq> 0})))"
1.1187  proof -
1.1188    interpret X: finite_prob_space "\<lparr>space = X ` space M,
1.1189                                      sets = Pow (X ` space M),
1.1190                                   measure = distribution X\<rparr>"
1.1191 -    using finite_prob_space by auto
1.1192 +    using finite_prob_space_of_images by auto
1.1193 +
1.1194    have triv: "\<And> x. (if distribution X {x} \<noteq> 0 then distribution X {x} else 0) = distribution X {x}"
1.1195      by auto
1.1196    hence sum1: "(\<Sum> x \<in> X ` space M \<inter> {y. distribution X {y} \<noteq> 0}. distribution X {x}) = 1"
1.1197 @@ -753,7 +649,7 @@
1.1198      also have "\<dots> = (if distribution X {x} \<noteq> 0
1.1199                      then distribution X {x} * log b (inverse (distribution X {x}))
1.1200                      else 0)"
1.1201 -      using log_inverse `1 < b` X.positive[of "{x}"] asm by auto
1.1202 +      using log_inverse b_gt_1 X.positive[of "{x}"] asm by auto
1.1203      finally have "- distribution X {x} * log b (distribution X {x})
1.1204                   = (if distribution X {x} \<noteq> 0
1.1205                      then distribution X {x} * log b (inverse (distribution X {x}))
1.1206 @@ -769,7 +665,7 @@
1.1207      unfolding setsum_restrict_set[OF finite_imageI[OF finite_space, of X]] by auto
1.1208    also have "\<dots> \<le> log b (\<Sum> x \<in> X ` space M \<inter> {y. distribution X {y} \<noteq> 0}.
1.1209                            distribution X {x} * (inverse (distribution X {x})))"
1.1210 -    apply (subst log_setsum[OF _ _ `b > 1` sum1,
1.1211 +    apply (subst log_setsum[OF _ _ b_gt_1 sum1,
1.1212       unfolded greaterThan_iff, OF _ _ _]) using pos sets_eq_Pow
1.1213        X.finite_space assms X.positive not_empty by auto
1.1214    also have "\<dots> = log b (\<Sum> x \<in> X ` space M \<inter> {y. distribution X {y} \<noteq> 0}. 1)"
1.1215 @@ -778,7 +674,7 @@
1.1216      by auto
1.1217    finally have "- (\<Sum>x\<in>X ` space M. distribution X {x} * log b (distribution X {x}))
1.1218                 \<le> log b (real_of_nat (card (X ` space M \<inter> {y. distribution X {y} \<noteq> 0})))" by simp
1.1219 -  thus ?thesis unfolding finite_entropy_reduce[OF assms] real_eq_of_nat by auto
1.1220 +  thus ?thesis unfolding entropy_eq real_eq_of_nat by auto
1.1221  qed
1.1222
1.1223  (* --------------- entropy is maximal for a uniform rv --------------------- *)
1.1224 @@ -808,34 +704,31 @@
1.1225      by (auto simp:field_simps)
1.1226  qed
1.1227
1.1228 -lemma (in finite_prob_space) finite_entropy_uniform_max:
1.1229 -  assumes "b > 1"
1.1230 +lemma (in finite_information_space) finite_entropy_uniform_max:
1.1231    assumes "\<And>x y. \<lbrakk> x \<in> X ` space M ; y \<in> X ` space M \<rbrakk> \<Longrightarrow> distribution X {x} = distribution X {y}"
1.1232 -  shows "\<H>\<^bsub>b\<^esub>(X) = log b (real (card (X ` space M)))"
1.1233 +  shows "\<H>(X) = log b (real (card (X ` space M)))"
1.1234  proof -
1.1235    interpret X: finite_prob_space "\<lparr>space = X ` space M,
1.1236                                      sets = Pow (X ` space M),
1.1237                                   measure = distribution X\<rparr>"
1.1238 -    using finite_prob_space by auto
1.1239 +    using finite_prob_space_of_images by auto
1.1240 +
1.1241    { fix x assume xasm: "x \<in> X ` space M"
1.1242      hence card_gt0: "real (card (X ` space M)) > 0"
1.1243        using card_gt_0_iff X.finite_space by auto
1.1244      from xasm have "\<And> y. y \<in> X ` space M \<Longrightarrow> distribution X {y} = distribution X {x}"
1.1245        using assms by blast
1.1246      hence "- (\<Sum>x\<in>X ` space M. distribution X {x} * log b (distribution X {x}))
1.1247 -         = - (\<Sum> y \<in> X ` space M. distribution X {x} * log b (distribution X {x}))"
1.1248 -      by auto
1.1249 -    also have "\<dots> = - real_of_nat (card (X ` space M)) * distribution X {x} * log b (distribution X {x})"
1.1250 -      by auto
1.1251 +         = - real (card (X ` space M)) * distribution X {x} * log b (distribution X {x})"
1.1252 +      unfolding real_eq_of_nat by auto
1.1253      also have "\<dots> = - real (card (X ` space M)) * (1 / real (card (X ` space M))) * log b (1 / real (card (X ` space M)))"
1.1254 -      unfolding real_eq_of_nat[symmetric]
1.1255 -      by (auto simp: X.uniform_prob[simplified, OF xasm assms(2)])
1.1256 +      by (auto simp: X.uniform_prob[simplified, OF xasm assms])
1.1257      also have "\<dots> = log b (real (card (X ` space M)))"
1.1258        unfolding inverse_eq_divide[symmetric]
1.1259 -      using card_gt0 log_inverse `b > 1`
1.1260 +      using card_gt0 log_inverse b_gt_1
1.1261        by (auto simp add:field_simps card_gt0)
1.1262      finally have ?thesis
1.1263 -      unfolding finite_entropy_reduce[OF `b > 1`] by auto }
1.1264 +      unfolding entropy_eq by auto }
1.1265    moreover
1.1266    { assume "X ` space M = {}"
1.1267      hence "distribution X (X ` space M) = 0"
1.1268 @@ -844,4 +737,199 @@
1.1269    ultimately show ?thesis by auto
1.1270  qed
1.1271
1.1272 +definition "subvimage A f g \<longleftrightarrow> (\<forall>x \<in> A. f -` {f x} \<inter> A \<subseteq> g -` {g x} \<inter> A)"
1.1273 +
1.1274 +lemma subvimageI:
1.1275 +  assumes "\<And>x y. \<lbrakk> x \<in> A ; y \<in> A ; f x = f y \<rbrakk> \<Longrightarrow> g x = g y"
1.1276 +  shows "subvimage A f g"
1.1277 +  using assms unfolding subvimage_def by blast
1.1278 +
1.1279 +lemma subvimageE[consumes 1]:
1.1280 +  assumes "subvimage A f g"
1.1281 +  obtains "\<And>x y. \<lbrakk> x \<in> A ; y \<in> A ; f x = f y \<rbrakk> \<Longrightarrow> g x = g y"
1.1282 +  using assms unfolding subvimage_def by blast
1.1283 +
1.1284 +lemma subvimageD:
1.1285 +  "\<lbrakk> subvimage A f g ; x \<in> A ; y \<in> A ; f x = f y \<rbrakk> \<Longrightarrow> g x = g y"
1.1286 +  using assms unfolding subvimage_def by blast
1.1287 +
1.1288 +lemma subvimage_subset:
1.1289 +  "\<lbrakk> subvimage B f g ; A \<subseteq> B \<rbrakk> \<Longrightarrow> subvimage A f g"
1.1290 +  unfolding subvimage_def by auto
1.1291 +
1.1292 +lemma subvimage_idem[intro]: "subvimage A g g"
1.1293 +  by (safe intro!: subvimageI)
1.1294 +
1.1295 +lemma subvimage_comp_finer[intro]:
1.1296 +  assumes svi: "subvimage A g h"
1.1297 +  shows "subvimage A g (f \<circ> h)"
1.1298 +proof (rule subvimageI, simp)
1.1299 +  fix x y assume "x \<in> A" "y \<in> A" "g x = g y"
1.1300 +  from svi[THEN subvimageD, OF this]
1.1301 +  show "f (h x) = f (h y)" by simp
1.1302 +qed
1.1303 +
1.1304 +lemma subvimage_comp_gran:
1.1305 +  assumes svi: "subvimage A g h"
1.1306 +  assumes inj: "inj_on f (g ` A)"
1.1307 +  shows "subvimage A (f \<circ> g) h"
1.1308 +  by (rule subvimageI) (auto intro!: subvimageD[OF svi] simp: inj_on_iff[OF inj])
1.1309 +
1.1310 +lemma subvimage_comp:
1.1311 +  assumes svi: "subvimage (f ` A) g h"
1.1312 +  shows "subvimage A (g \<circ> f) (h \<circ> f)"
1.1313 +  by (rule subvimageI) (auto intro!: svi[THEN subvimageD])
1.1314 +
1.1315 +lemma subvimage_trans:
1.1316 +  assumes fg: "subvimage A f g"
1.1317 +  assumes gh: "subvimage A g h"
1.1318 +  shows "subvimage A f h"
1.1319 +  by (rule subvimageI) (auto intro!: fg[THEN subvimageD] gh[THEN subvimageD])
1.1320 +
1.1321 +lemma subvimage_translator:
1.1322 +  assumes svi: "subvimage A f g"
1.1323 +  shows "\<exists>h. \<forall>x \<in> A. h (f x)  = g x"
1.1324 +proof (safe intro!: exI[of _ "\<lambda>x. (THE z. z \<in> (g ` (f -` {x} \<inter> A)))"])
1.1325 +  fix x assume "x \<in> A"
1.1326 +  show "(THE x'. x' \<in> (g ` (f -` {f x} \<inter> A))) = g x"
1.1327 +    by (rule theI2[of _ "g x"])
1.1328 +      (insert `x \<in> A`, auto intro!: svi[THEN subvimageD])
1.1329 +qed
1.1330 +
1.1331 +lemma subvimage_translator_image:
1.1332 +  assumes svi: "subvimage A f g"
1.1333 +  shows "\<exists>h. h ` f ` A = g ` A"
1.1334 +proof -
1.1335 +  from subvimage_translator[OF svi]
1.1336 +  obtain h where "\<And>x. x \<in> A \<Longrightarrow> h (f x) = g x" by auto
1.1337 +  thus ?thesis
1.1338 +    by (auto intro!: exI[of _ h]
1.1339 +      simp: image_compose[symmetric] comp_def cong: image_cong)
1.1340 +qed
1.1341 +
1.1342 +lemma subvimage_finite:
1.1343 +  assumes svi: "subvimage A f g" and fin: "finite (f`A)"
1.1344 +  shows "finite (g`A)"
1.1345 +proof -
1.1346 +  from subvimage_translator_image[OF svi]
1.1347 +  obtain h where "g`A = h`f`A" by fastsimp
1.1348 +  with fin show "finite (g`A)" by simp
1.1349 +qed
1.1350 +
1.1351 +lemma subvimage_disj:
1.1352 +  assumes svi: "subvimage A f g"
1.1353 +  shows "f -` {x} \<inter> A \<subseteq> g -` {y} \<inter> A \<or>
1.1354 +      f -` {x} \<inter> g -` {y} \<inter> A = {}" (is "?sub \<or> ?dist")
1.1355 +proof (rule disjCI)
1.1356 +  assume "\<not> ?dist"
1.1357 +  then obtain z where "z \<in> A" and "x = f z" and "y = g z" by auto
1.1358 +  thus "?sub" using svi unfolding subvimage_def by auto
1.1359 +qed
1.1360 +
1.1361 +lemma setsum_image_split:
1.1362 +  assumes svi: "subvimage A f g" and fin: "finite (f ` A)"
1.1363 +  shows "(\<Sum>x\<in>f`A. h x) = (\<Sum>y\<in>g`A. \<Sum>x\<in>f`(g -` {y} \<inter> A). h x)"
1.1364 +    (is "?lhs = ?rhs")
1.1365 +proof -
1.1366 +  have "f ` A =
1.1367 +      snd ` (SIGMA x : g ` A. f ` (g -` {x} \<inter> A))"
1.1368 +      (is "_ = snd ` ?SIGMA")
1.1369 +    unfolding image_split_eq_Sigma[symmetric]
1.1370 +    by (simp add: image_compose[symmetric] comp_def)
1.1371 +  moreover
1.1372 +  have snd_inj: "inj_on snd ?SIGMA"
1.1373 +    unfolding image_split_eq_Sigma[symmetric]
1.1374 +    by (auto intro!: inj_onI subvimageD[OF svi])
1.1375 +  ultimately
1.1376 +  have "(\<Sum>x\<in>f`A. h x) = (\<Sum>(x,y)\<in>?SIGMA. h y)"
1.1377 +    by (auto simp: setsum_reindex intro: setsum_cong)
1.1378 +  also have "... = ?rhs"
1.1379 +    using subvimage_finite[OF svi fin] fin
1.1380 +    apply (subst setsum_Sigma[symmetric])
1.1381 +    by (auto intro!: finite_subset[of _ "f`A"])
1.1382 +  finally show ?thesis .
1.1383 +qed
1.1384 +
1.1385 +lemma (in finite_information_space) entropy_partition:
1.1386 +  assumes svi: "subvimage (space M) X P"
1.1387 +  shows "\<H>(X) = \<H>(P) + \<H>(X|P)"
1.1388 +proof -
1.1389 +  have "(\<Sum>x\<in>X ` space M. distribution X {x} * log b (distribution X {x})) =
1.1390 +    (\<Sum>y\<in>P `space M. \<Sum>x\<in>X ` space M.
1.1391 +    joint_distribution X P {(x, y)} * log b (joint_distribution X P {(x, y)}))"
1.1392 +  proof (subst setsum_image_split[OF svi],
1.1393 +      safe intro!: finite_imageI finite_space setsum_mono_zero_cong_left imageI)
1.1394 +    fix p x assume in_space: "p \<in> space M" "x \<in> space M"
1.1395 +    assume "joint_distribution X P {(X x, P p)} * log b (joint_distribution X P {(X x, P p)}) \<noteq> 0"
1.1396 +    hence "(\<lambda>x. (X x, P x)) -` {(X x, P p)} \<inter> space M \<noteq> {}" by (auto simp: distribution_def)
1.1397 +    with svi[unfolded subvimage_def, rule_format, OF `x \<in> space M`]
1.1398 +    show "x \<in> P -` {P p}" by auto
1.1399 +  next
1.1400 +    fix p x assume in_space: "p \<in> space M" "x \<in> space M"
1.1401 +    assume "P x = P p"
1.1402 +    from this[symmetric] svi[unfolded subvimage_def, rule_format, OF `x \<in> space M`]
1.1403 +    have "X -` {X x} \<inter> space M \<subseteq> P -` {P p} \<inter> space M"
1.1404 +      by auto
1.1405 +    hence "(\<lambda>x. (X x, P x)) -` {(X x, P p)} \<inter> space M = X -` {X x} \<inter> space M"
1.1406 +      by auto
1.1407 +    thus "distribution X {X x} * log b (distribution X {X x}) =
1.1408 +          joint_distribution X P {(X x, P p)} *
1.1409 +          log b (joint_distribution X P {(X x, P p)})"
1.1410 +      by (auto simp: distribution_def)
1.1411 +  qed
1.1412 +  thus ?thesis
1.1413 +  unfolding entropy_eq conditional_entropy_eq
1.1414 +    by (simp add: setsum_cartesian_product' setsum_subtractf setsum_distribution
1.1415 +      setsum_left_distrib[symmetric] setsum_commute[where B="P`space M"])
1.1416 +qed
1.1417 +
1.1418 +corollary (in finite_information_space) entropy_data_processing:
1.1419 +  "\<H>(f \<circ> X) \<le> \<H>(X)"
1.1420 +  by (subst (2) entropy_partition[of _ "f \<circ> X"]) (auto intro: conditional_entropy_positive)
1.1421 +
1.1422 +lemma (in prob_space) distribution_cong:
1.1423 +  assumes "\<And>x. x \<in> space M \<Longrightarrow> X x = Y x"
1.1424 +  shows "distribution X = distribution Y"
1.1425 +  unfolding distribution_def expand_fun_eq
1.1426 +  using assms by (auto intro!: arg_cong[where f=prob])
1.1427 +
1.1428 +lemma (in prob_space) joint_distribution_cong:
1.1429 +  assumes "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
1.1430 +  assumes "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
1.1431 +  shows "joint_distribution X Y = joint_distribution X' Y'"
1.1432 +  unfolding distribution_def expand_fun_eq
1.1433 +  using assms by (auto intro!: arg_cong[where f=prob])
1.1434 +
1.1435 +lemma image_cong:
1.1436 +  "\<lbrakk> \<And>x. x \<in> S \<Longrightarrow> X x = X' x \<rbrakk> \<Longrightarrow> X ` S = X' ` S"
1.1437 +  by (auto intro!: image_eqI)
1.1438 +
1.1439 +lemma (in finite_information_space) mutual_information_cong:
1.1440 +  assumes X: "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
1.1441 +  assumes Y: "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
1.1442 +  shows "\<I>(X ; Y) = \<I>(X' ; Y')"
1.1443 +proof -
1.1444 +  have "X ` space M = X' ` space M" using X by (rule image_cong)
1.1445 +  moreover have "Y ` space M = Y' ` space M" using Y by (rule image_cong)
1.1446 +  ultimately show ?thesis
1.1447 +  unfolding mutual_information_eq
1.1448 +    using
1.1449 +      assms[THEN distribution_cong]
1.1450 +      joint_distribution_cong[OF assms]
1.1451 +    by (auto intro!: setsum_cong)
1.1452 +qed
1.1453 +
1.1454 +corollary (in finite_information_space) entropy_of_inj:
1.1455 +  assumes "inj_on f (X`space M)"
1.1456 +  shows "\<H>(f \<circ> X) = \<H>(X)"
1.1457 +proof (rule antisym)
1.1458 +  show "\<H>(f \<circ> X) \<le> \<H>(X)" using entropy_data_processing .
1.1459 +next
1.1460 +  have "\<H>(X) = \<H>(the_inv_into (X`space M) f \<circ> (f \<circ> X))"
1.1461 +    by (auto intro!: mutual_information_cong simp: entropy_def the_inv_into_f_f[OF assms])
1.1462 +  also have "... \<le> \<H>(f \<circ> X)"
1.1463 +    using entropy_data_processing .
1.1464 +  finally show "\<H>(X) \<le> \<H>(f \<circ> X)" .
1.1465 +qed
1.1466 +
1.1467  end
```
```     2.1 --- a/src/HOL/Probability/Lebesgue.thy	Mon May 03 14:35:10 2010 +0200
2.2 +++ b/src/HOL/Probability/Lebesgue.thy	Mon May 03 14:35:10 2010 +0200
2.3 @@ -25,6 +25,21 @@
2.4    shows "nonneg (neg_part f)"
2.5    unfolding nonneg_def neg_part_def min_def by auto
2.6
2.7 +lemma pos_neg_part_abs:
2.8 +  fixes f :: "'a \<Rightarrow> real"
2.9 +  shows "pos_part f x + neg_part f x = \<bar>f x\<bar>"
2.10 +unfolding real_abs_def pos_part_def neg_part_def by auto
2.11 +
2.12 +lemma pos_part_abs:
2.13 +  fixes f :: "'a \<Rightarrow> real"
2.14 +  shows "pos_part (\<lambda> x. \<bar>f x\<bar>) y = \<bar>f y\<bar>"
2.15 +unfolding pos_part_def real_abs_def by auto
2.16 +
2.17 +lemma neg_part_abs:
2.18 +  fixes f :: "'a \<Rightarrow> real"
2.19 +  shows "neg_part (\<lambda> x. \<bar>f x\<bar>) y = 0"
2.20 +unfolding neg_part_def real_abs_def by auto
2.21 +
2.22  lemma (in measure_space)
2.23    assumes "f \<in> borel_measurable M"
2.24    shows pos_part_borel_measurable: "pos_part f \<in> borel_measurable M"
2.25 @@ -1273,6 +1288,22 @@
2.26    thus "?int S" and "?I S" by auto
2.27  qed
2.28
2.29 +lemma (in measure_space) integrable_abs:
2.30 +  assumes "integrable f"
2.31 +  shows "integrable (\<lambda> x. \<bar>f x\<bar>)"
2.32 +using assms
2.33 +proof -
2.34 +  from assms obtain p q where pq: "p \<in> nnfis (pos_part f)" "q \<in> nnfis (neg_part f)"
2.35 +    unfolding integrable_def by auto
2.36 +  hence "p + q \<in> nnfis (\<lambda> x. pos_part f x + neg_part f x)"
2.37 +    using nnfis_add by auto
2.38 +  hence "p + q \<in> nnfis (\<lambda> x. \<bar>f x\<bar>)" using pos_neg_part_abs[of f] by simp
2.39 +  thus ?thesis unfolding integrable_def
2.40 +    using ext[OF pos_part_abs[of f], of "\<lambda> y. y"]
2.41 +      ext[OF neg_part_abs[of f], of "\<lambda> y. y"]
2.42 +    using nnfis_0 by auto
2.43 +qed
2.44 +
2.45  lemma markov_ineq:
2.46    assumes "integrable f" "0 < a" "integrable (\<lambda>x. \<bar>f x\<bar>^n)"
2.47    shows "measure M (f -` {a ..} \<inter> space M) \<le> integral (\<lambda>x. \<bar>f x\<bar>^n) / a^n"
2.48 @@ -1310,6 +1341,61 @@
2.49      by (auto intro!: mult_imp_le_div_pos[OF `0 < a ^ n`], simp add: real_mult_commute)
2.50  qed
2.51
2.52 +lemma (in measure_space) integral_0:
2.53 +  fixes f :: "'a \<Rightarrow> real"
2.54 +  assumes "integrable f" "integral f = 0" "nonneg f" and borel: "f \<in> borel_measurable M"
2.55 +  shows "measure M ({x. f x \<noteq> 0} \<inter> space M) = 0"
2.56 +proof -
2.57 +  have "{x. f x \<noteq> 0} = {x. \<bar>f x\<bar> > 0}" by auto
2.58 +  moreover
2.59 +  { fix y assume "y \<in> {x. \<bar> f x \<bar> > 0}"
2.60 +    hence "\<bar> f y \<bar> > 0" by auto
2.61 +    hence "\<exists> n. \<bar>f y\<bar> \<ge> inverse (real (Suc n))"
2.62 +      using ex_inverse_of_nat_Suc_less[of "\<bar>f y\<bar>"] less_imp_le unfolding real_of_nat_def by auto
2.63 +    hence "y \<in> (\<Union> n. {x. \<bar>f x\<bar> \<ge> inverse (real (Suc n))})"
2.64 +      by auto }
2.65 +  moreover
2.66 +  { fix y assume "y \<in> (\<Union> n. {x. \<bar>f x\<bar> \<ge> inverse (real (Suc n))})"
2.67 +    then obtain n where n: "y \<in> {x. \<bar>f x\<bar> \<ge> inverse (real (Suc n))}" by auto
2.68 +    hence "\<bar>f y\<bar> \<ge> inverse (real (Suc n))" by auto
2.69 +    hence "\<bar>f y\<bar> > 0"
2.70 +      using real_of_nat_Suc_gt_zero
2.71 +        positive_imp_inverse_positive[of "real_of_nat (Suc n)"] by fastsimp
2.72 +    hence "y \<in> {x. \<bar>f x\<bar> > 0}" by auto }
2.73 +  ultimately have fneq0_UN: "{x. f x \<noteq> 0} = (\<Union> n. {x. \<bar>f x\<bar> \<ge> inverse (real (Suc n))})"
2.74 +    by blast
2.75 +  { fix n
2.76 +    have int_one: "integrable (\<lambda> x. \<bar>f x\<bar> ^ 1)" using integrable_abs assms by auto
2.77 +    have "measure M (f -` {inverse (real (Suc n))..} \<inter> space M)
2.78 +           \<le> integral (\<lambda> x. \<bar>f x\<bar> ^ 1) / (inverse (real (Suc n)) ^ 1)"
2.79 +      using markov_ineq[OF `integrable f` _ int_one] real_of_nat_Suc_gt_zero by auto
2.80 +    hence le0: "measure M (f -` {inverse (real (Suc n))..} \<inter> space M) \<le> 0"
2.81 +      using assms unfolding nonneg_def by auto
2.82 +    have "{x. f x \<ge> inverse (real (Suc n))} \<inter> space M \<in> sets M"
2.83 +      apply (subst Int_commute) unfolding Int_def
2.84 +      using borel[unfolded borel_measurable_ge_iff] by simp
2.85 +    hence m0: "measure M ({x. f x \<ge> inverse (real (Suc n))} \<inter> space M) = 0 \<and>
2.86 +      {x. f x \<ge> inverse (real (Suc n))} \<inter> space M \<in> sets M"
2.87 +      using positive le0 unfolding atLeast_def by fastsimp }
2.88 +  moreover hence "range (\<lambda> n. {x. f x \<ge> inverse (real (Suc n))} \<inter> space M) \<subseteq> sets M"
2.89 +    by auto
2.90 +  moreover
2.91 +  { fix n
2.92 +    have "inverse (real (Suc n)) \<ge> inverse (real (Suc (Suc n)))"
2.93 +      using less_imp_inverse_less real_of_nat_Suc_gt_zero[of n] by fastsimp
2.94 +    hence "\<And> x. f x \<ge> inverse (real (Suc n)) \<Longrightarrow> f x \<ge> inverse (real (Suc (Suc n)))" by (rule order_trans)
2.95 +    hence "{x. f x \<ge> inverse (real (Suc n))} \<inter> space M
2.96 +         \<subseteq> {x. f x \<ge> inverse (real (Suc (Suc n)))} \<inter> space M" by auto }
2.97 +  ultimately have "(\<lambda> x. 0) ----> measure M (\<Union> n. {x. f x \<ge> inverse (real (Suc n))} \<inter> space M)"
2.98 +    using monotone_convergence[of "\<lambda> n. {x. f x \<ge> inverse (real (Suc n))} \<inter> space M"]
2.99 +    unfolding o_def by (simp del: of_nat_Suc)
2.100 +  hence "measure M (\<Union> n. {x. f x \<ge> inverse (real (Suc n))} \<inter> space M) = 0"
2.101 +    using LIMSEQ_const[of 0] LIMSEQ_unique by simp
2.102 +  hence "measure M ((\<Union> n. {x. \<bar>f x\<bar> \<ge> inverse (real (Suc n))}) \<inter> space M) = 0"
2.103 +    using assms unfolding nonneg_def by auto
2.104 +  thus "measure M ({x. f x \<noteq> 0} \<inter> space M) = 0" using fneq0_UN by simp
2.105 +qed
2.106 +
2.107  section "Lebesgue integration on countable spaces"
2.108
2.109  lemma nnfis_on_countable:
2.110 @@ -1551,10 +1637,6 @@
2.111
2.112  end
2.113
2.114 -locale finite_measure_space = measure_space +
2.115 -  assumes finite_space: "finite (space M)"
2.116 -  and sets_eq_Pow: "sets M = Pow (space M)"
2.117 -
2.118  lemma sigma_algebra_cong:
2.119    fixes M :: "('a, 'b) algebra_scheme" and M' :: "('a, 'c) algebra_scheme"
2.120    assumes *: "sigma_algebra M"
2.121 @@ -1610,7 +1692,7 @@
2.122  lemma (in finite_measure_space) RN_deriv_finite_singleton:
2.123    fixes v :: "'a set \<Rightarrow> real"
2.124    assumes ms_v: "measure_space (M\<lparr>measure := v\<rparr>)"
2.125 -  and eq_0: "\<And>x. measure M {x} = 0 \<Longrightarrow> v {x} = 0"
2.126 +  and eq_0: "\<And>x. \<lbrakk> x \<in> space M ; measure M {x} = 0 \<rbrakk> \<Longrightarrow> v {x} = 0"
2.127    and "x \<in> space M" and "measure M {x} \<noteq> 0"
2.128    shows "RN_deriv v x = v {x} / (measure M {x})" (is "_ = ?v x")
2.129    unfolding RN_deriv_def
2.130 @@ -1621,7 +1703,7 @@
2.131    fix a assume "a \<in> sets M"
2.132    hence "a \<subseteq> space M" and "finite a"
2.133      using sets_into_space finite_space by (auto intro: finite_subset)
2.134 -  have *: "\<And>x a. (if measure M {x} = 0 then 0 else v {x} * indicator_fn a x) =
2.135 +  have *: "\<And>x a. x \<in> space M \<Longrightarrow> (if measure M {x} = 0 then 0 else v {x} * indicator_fn a x) =
2.136      v {x} * indicator_fn a x" using eq_0 by auto
2.137
2.138    from measure_space.measure_real_sum_image[OF ms_v, of a]
```
```     3.1 --- a/src/HOL/Probability/Measure.thy	Mon May 03 14:35:10 2010 +0200
3.2 +++ b/src/HOL/Probability/Measure.thy	Mon May 03 14:35:10 2010 +0200
3.3 @@ -365,6 +365,18 @@
3.4      by arith
3.5  qed
3.6
3.7 +lemma (in measure_space) measure_mono:
3.8 +  assumes "a \<subseteq> b" "a \<in> sets M" "b \<in> sets M"
3.9 +  shows "measure M a \<le> measure M b"
3.10 +proof -
3.11 +  have "b = a \<union> (b - a)" using assms by auto
3.12 +  moreover have "{} = a \<inter> (b - a)" by auto
3.13 +  ultimately have "measure M b = measure M a + measure M (b - a)"
3.14 +    using measure_additive[of a "b - a"] local.Diff[of b a] assms by auto
3.15 +  moreover have "measure M (b - a) \<ge> 0" using positive assms by auto
3.16 +  ultimately show "measure M a \<le> measure M b" by auto
3.17 +qed
3.18 +
3.19  lemma disjoint_family_Suc:
3.20    assumes Suc: "!!n. A n \<subseteq> A (Suc n)"
3.21    shows "disjoint_family (\<lambda>i. A (Suc i) - A i)"
3.22 @@ -1045,4 +1057,12 @@
3.23    qed
3.24  qed
3.25
3.26 +locale finite_measure_space = measure_space +
3.27 +  assumes finite_space: "finite (space M)"
3.28 +  and sets_eq_Pow: "sets M = Pow (space M)"
3.29 +
3.30 +lemma (in finite_measure_space) sum_over_space: "(\<Sum>x\<in>space M. measure M {x}) = measure M (space M)"
3.31 +  using measure_finitely_additive''[of "space M" "\<lambda>i. {i}"]
3.32 +  by (simp add: sets_eq_Pow disjoint_family_on_def finite_space)
3.33 +
3.34  end
3.35 \ No newline at end of file
```
```     4.1 --- a/src/HOL/Probability/Probability_Space.thy	Mon May 03 14:35:10 2010 +0200
4.2 +++ b/src/HOL/Probability/Probability_Space.thy	Mon May 03 14:35:10 2010 +0200
4.3 @@ -21,22 +21,23 @@
4.4  definition
4.5    "distribution X = (\<lambda>s. prob ((X -` s) \<inter> (space M)))"
4.6
4.7 -definition
4.8 -  "probably e \<longleftrightarrow> e \<in> events \<and> prob e = 1"
4.9 +abbreviation
4.10 +  "joint_distribution X Y \<equiv> distribution (\<lambda>x. (X x, Y x))"
4.11
4.12 -definition
4.13 -  "possibly e \<longleftrightarrow> e \<in> events \<and> prob e \<noteq> 0"
4.14 +(*
4.15 +definition probably :: "('a \<Rightarrow> bool) \<Rightarrow> bool" (binder "\<forall>\<^sup>*" 10) where
4.16 +  "probably P \<longleftrightarrow> { x. P x } \<in> events \<and> prob { x. P x } = 1"
4.17 +definition possibly :: "('a \<Rightarrow> bool) \<Rightarrow> bool" (binder "\<exists>\<^sup>*" 10) where
4.18 +  "possibly P \<longleftrightarrow> { x. P x } \<in> events \<and> prob { x. P x } \<noteq> 0"
4.19 +*)
4.20
4.21  definition
4.22 -  "joint_distribution X Y \<equiv> (\<lambda>a. prob ((\<lambda>x. (X x, Y x)) -` a \<inter> space M))"
4.23 +  "conditional_expectation X M' \<equiv> SOME f. f \<in> measurable M' borel_space \<and>
4.24 +    (\<forall> g \<in> sets M'. measure_space.integral M' (\<lambda>x. f x * indicator_fn g x) =
4.25 +                    measure_space.integral M' (\<lambda>x. X x * indicator_fn g x))"
4.26
4.27  definition
4.28 -  "conditional_expectation X s \<equiv> THE f. random_variable borel_space f \<and>
4.29 -    (\<forall> g \<in> s. integral (\<lambda>x. f x * indicator_fn g x) =
4.30 -              integral (\<lambda>x. X x * indicator_fn g x))"
4.31 -
4.32 -definition
4.33 -  "conditional_prob e1 e2 \<equiv> conditional_expectation (indicator_fn e1) e2"
4.34 +  "conditional_prob E M' \<equiv> conditional_expectation (indicator_fn E) M'"
4.35
4.36  lemma positive': "positive M prob"
4.37    unfolding positive_def using positive empty_measure by blast
4.38 @@ -389,14 +390,61 @@
4.39
4.40  locale finite_prob_space = prob_space + finite_measure_space
4.41
4.42 -lemma (in finite_prob_space) finite_marginal_product_space_POW2:
4.43 +lemma finite_prob_space_eq:
4.44 +  "finite_prob_space M \<longleftrightarrow> finite_measure_space M \<and> measure M (space M) = 1"
4.45 +  unfolding finite_prob_space_def finite_measure_space_def prob_space_def prob_space_axioms_def
4.46 +  by auto
4.47 +
4.48 +lemma (in prob_space) not_empty: "space M \<noteq> {}"
4.49 +  using prob_space empty_measure by auto
4.50 +
4.51 +lemma (in finite_prob_space) sum_over_space_eq_1: "(\<Sum>x\<in>space M. measure M {x}) = 1"
4.52 +  using prob_space sum_over_space by simp
4.53 +
4.54 +lemma (in finite_prob_space) positive_distribution: "0 \<le> distribution X x"
4.55 +  unfolding distribution_def using positive sets_eq_Pow by simp
4.56 +
4.57 +lemma (in finite_prob_space) joint_distribution_restriction_fst:
4.58 +  "joint_distribution X Y A \<le> distribution X (fst ` A)"
4.59 +  unfolding distribution_def
4.60 +proof (safe intro!: measure_mono)
4.61 +  fix x assume "x \<in> space M" and *: "(X x, Y x) \<in> A"
4.62 +  show "x \<in> X -` fst ` A"
4.63 +    by (auto intro!: image_eqI[OF _ *])
4.65 +
4.66 +lemma (in finite_prob_space) joint_distribution_restriction_snd:
4.67 +  "joint_distribution X Y A \<le> distribution Y (snd ` A)"
4.68 +  unfolding distribution_def
4.69 +proof (safe intro!: measure_mono)
4.70 +  fix x assume "x \<in> space M" and *: "(X x, Y x) \<in> A"
4.71 +  show "x \<in> Y -` snd ` A"
4.72 +    by (auto intro!: image_eqI[OF _ *])
4.74 +
4.75 +lemma (in finite_prob_space) distribution_order:
4.76 +  shows "0 \<le> distribution X x'"
4.77 +  and "(distribution X x' \<noteq> 0) \<longleftrightarrow> (0 < distribution X x')"
4.78 +  and "r \<le> joint_distribution X Y {(x, y)} \<Longrightarrow> r \<le> distribution X {x}"
4.79 +  and "r \<le> joint_distribution X Y {(x, y)} \<Longrightarrow> r \<le> distribution Y {y}"
4.80 +  and "r < joint_distribution X Y {(x, y)} \<Longrightarrow> r < distribution X {x}"
4.81 +  and "r < joint_distribution X Y {(x, y)} \<Longrightarrow> r < distribution Y {y}"
4.82 +  and "distribution X {x} = 0 \<Longrightarrow> joint_distribution X Y {(x, y)} = 0"
4.83 +  and "distribution Y {y} = 0 \<Longrightarrow> joint_distribution X Y {(x, y)} = 0"
4.84 +  using positive_distribution[of X x']
4.85 +    positive_distribution[of "\<lambda>x. (X x, Y x)" "{(x, y)}"]
4.86 +    joint_distribution_restriction_fst[of X Y "{(x, y)}"]
4.87 +    joint_distribution_restriction_snd[of X Y "{(x, y)}"]
4.88 +  by auto
4.89 +
4.90 +lemma (in finite_prob_space) finite_product_measure_space:
4.91    assumes "finite s1" "finite s2"
4.92    shows "finite_measure_space \<lparr> space = s1 \<times> s2, sets = Pow (s1 \<times> s2), measure = joint_distribution X Y\<rparr>"
4.93      (is "finite_measure_space ?M")
4.95    show "positive ?M (measure ?M)"
4.96      unfolding positive_def using positive'[unfolded positive_def] assms sets_eq_Pow
4.97 -    by (simp add: joint_distribution_def)
4.98 +    by (simp add: distribution_def)
4.99
4.101    proof safe
4.102 @@ -406,7 +454,7 @@
4.103      assume "x \<inter> y = {}"
4.105      show "measure ?M (x \<union> y) = measure ?M x + measure ?M y"
4.106 -      apply (simp add: joint_distribution_def)
4.107 +      apply (simp add: distribution_def)
4.108        apply (subst Int_Un_distrib2)
4.109        by auto
4.110    qed
4.111 @@ -418,11 +466,58 @@
4.112      by simp
4.113  qed
4.114
4.115 -lemma (in finite_prob_space) finite_marginal_product_space_POW:
4.116 +lemma (in finite_prob_space) finite_product_measure_space_of_images:
4.117    shows "finite_measure_space \<lparr> space = X ` space M \<times> Y ` space M,
4.118                                  sets = Pow (X ` space M \<times> Y ` space M),
4.119                                  measure = joint_distribution X Y\<rparr>"
4.120      (is "finite_measure_space ?M")
4.121 -  using finite_space by (auto intro!: finite_marginal_product_space_POW2)
4.122 +  using finite_space by (auto intro!: finite_product_measure_space)
4.123 +
4.124 +lemma (in finite_prob_space) finite_measure_space:
4.125 +  shows "finite_measure_space \<lparr> space = X ` space M, sets = Pow (X ` space M), measure = distribution X\<rparr>"
4.126 +    (is "finite_measure_space ?S")
4.128 +  show "finite (X ` space M)" using finite_space by simp
4.129 +
4.130 +  show "positive ?S (distribution X)" unfolding distribution_def
4.131 +    unfolding positive_def using positive'[unfolded positive_def] sets_eq_Pow by auto
4.132 +
4.134 +  proof (simp, safe)
4.135 +    fix x y
4.136 +    have x: "(X -` x) \<inter> space M \<in> sets M"
4.137 +      and y: "(X -` y) \<inter> space M \<in> sets M" using sets_eq_Pow by auto
4.138 +    assume "x \<inter> y = {}"
4.140 +    have "prob (((X -` x) \<union> (X -` y)) \<inter> space M) =
4.141 +      prob ((X -` x) \<inter> space M) + prob ((X -` y) \<inter> space M)"
4.142 +      apply (subst Int_Un_distrib2)
4.143 +      by auto
4.144 +    thus "prob ((X -` x \<union> X -` y) \<inter> space M) = prob (X -` x \<inter> space M) + prob (X -` y \<inter> space M)"
4.145 +      by auto
4.146 +  qed
4.147 +qed
4.148 +
4.149 +lemma (in finite_prob_space) finite_prob_space_of_images:
4.150 +  "finite_prob_space \<lparr> space = X ` space M, sets = Pow (X ` space M), measure = distribution X\<rparr>"
4.151 +  (is "finite_prob_space ?S")
4.152 +proof (simp add: finite_prob_space_eq, safe)
4.153 +  show "finite_measure_space ?S" by (rule finite_measure_space)
4.154 +  have "X -` X ` space M \<inter> space M = space M" by auto
4.155 +  thus "distribution X (X`space M) = 1"
4.156 +    by (simp add: distribution_def prob_space)
4.157 +qed
4.158 +
4.159 +lemma (in finite_prob_space) finite_product_prob_space_of_images:
4.160 +  "finite_prob_space \<lparr> space = X ` space M \<times> Y ` space M, sets = Pow (X ` space M \<times> Y ` space M),
4.161 +    measure = joint_distribution X Y\<rparr>"
4.162 +  (is "finite_prob_space ?S")
4.163 +proof (simp add: finite_prob_space_eq, safe)
4.164 +  show "finite_measure_space ?S" by (rule finite_product_measure_space_of_images)
4.165 +
4.166 +  have "X -` X ` space M \<inter> Y -` Y ` space M \<inter> space M = space M" by auto
4.167 +  thus "joint_distribution X Y (X ` space M \<times> Y ` space M) = 1"
4.168 +    by (simp add: distribution_def prob_space vimage_Times comp_def)
4.169 +qed
4.170
4.171  end
```
```     5.1 --- a/src/HOL/Probability/ex/Dining_Cryptographers.thy	Mon May 03 14:35:10 2010 +0200
5.2 +++ b/src/HOL/Probability/ex/Dining_Cryptographers.thy	Mon May 03 14:35:10 2010 +0200
5.3 @@ -2,10 +2,10 @@
5.4  imports Information
5.5  begin
5.6
5.7 -lemma finite_prob_spaceI:
5.8 -  "\<lbrakk> finite_measure_space M ; measure M (space M) = 1 \<rbrakk> \<Longrightarrow> finite_prob_space M"
5.9 -  unfolding finite_measure_space_def finite_measure_space_axioms_def
5.10 -    finite_prob_space_def prob_space_def prob_space_axioms_def
5.11 +lemma finite_information_spaceI:
5.12 +  "\<lbrakk> finite_measure_space M ; measure M (space M) = 1 ; 1 < b \<rbrakk> \<Longrightarrow> finite_information_space M b"
5.13 +  unfolding finite_information_space_def finite_measure_space_def finite_measure_space_axioms_def
5.14 +    finite_prob_space_def prob_space_def prob_space_axioms_def finite_information_space_axioms_def
5.15    by auto
5.16
5.17  locale finite_space =
5.18 @@ -21,8 +21,8 @@
5.19    and measure_M[simp]: "measure M s = real (card s) / real (card S)"
5.21
5.22 -sublocale finite_space \<subseteq> finite_prob_space M
5.23 -proof (rule finite_prob_spaceI)
5.24 +sublocale finite_space \<subseteq> finite_information_space M 2
5.25 +proof (rule finite_information_spaceI)
5.26    let ?measure = "\<lambda>s::'a set. real (card s) / real (card S)"
5.27
5.28    show "finite_measure_space M"
5.29 @@ -40,9 +40,7 @@
5.30          by (cases "card S = 0") (simp_all add: field_simps)
5.31      qed
5.32    qed
5.33 -
5.34 -  show "measure M (space M) = 1" by simp
5.35 -qed
5.36 +qed simp_all
5.37
5.38  lemma set_of_list_extend:
5.39    "{xs. length xs = Suc n \<and> (\<forall>x\<in>set xs. x \<in> A)} =
5.40 @@ -83,19 +81,6 @@
5.41    and card_list_length: "card {xs. length xs = n \<and> (\<forall>x\<in>set xs. x \<in> A)} = (card A)^n"
5.42    using card_finite_list_length[OF assms, of n] by auto
5.43
5.44 -lemma product_not_empty:
5.45 -  "A \<noteq> {} \<Longrightarrow> B \<noteq> {} \<Longrightarrow> A \<times> B \<noteq> {}"
5.46 -  by auto
5.47 -
5.48 -lemma fst_product[simp]: "fst ` (A \<times> B) = (if B = {} then {} else A)"
5.49 -  by (auto intro!: image_eqI)
5.50 -
5.51 -lemma snd_product[simp]: "snd ` (A \<times> B) = (if A = {} then {} else B)"
5.52 -  by (auto intro!: image_eqI)
5.53 -
5.54 -lemma Ex_eq_length[simp]: "\<exists>xs. length xs = n"
5.55 -  by (rule exI[of _ "replicate n undefined"]) simp
5.56 -
5.57  section "Define the state space"
5.58
5.59  text {*
5.60 @@ -197,10 +182,10 @@
5.61    have *: "{xs. length xs = n} \<noteq> {}"
5.62      by (auto intro!: exI[of _ "replicate n undefined"])
5.63    show ?thesis
5.64 -    unfolding payer_def_raw dc_crypto fst_product if_not_P[OF *] ..
5.65 +    unfolding payer_def_raw dc_crypto fst_image_times if_not_P[OF *] ..
5.66  qed
5.67
5.68 -lemma image_ex1_eq: "inj_on f A \<Longrightarrow> (b \<in> f ` A) = (\<exists>!x \<in> A. b = f x)"
5.69 +lemma image_ex1_eq: "inj_on f A \<Longrightarrow> (b \<in> f ` A) \<longleftrightarrow> (\<exists>!x \<in> A. b = f x)"
5.70    by (unfold inj_on_def) blast
5.71
5.72  lemma Ex1_eq: "\<exists>! x. P x \<Longrightarrow> P x \<Longrightarrow> P y \<Longrightarrow> x = y"
5.73 @@ -495,26 +480,24 @@
5.74    show "finite dc_crypto" using finite_dc_crypto .
5.75    show "dc_crypto \<noteq> {}"
5.76      unfolding dc_crypto
5.77 -    apply (rule product_not_empty)
5.78      using n_gt_3 by (auto intro: exI[of _ "replicate n True"])
5.79  qed
5.80
5.81  notation (in dining_cryptographers_space)
5.82 -  finite_mutual_information_2 ("\<I>'( _ ; _ ')")
5.83 +  finite_mutual_information ("\<I>'( _ ; _ ')")
5.84
5.85  notation (in dining_cryptographers_space)
5.86 -  finite_entropy_2 ("\<H>'( _ ')")
5.87 +  finite_entropy ("\<H>'( _ ')")
5.88
5.89  notation (in dining_cryptographers_space)
5.90 -  finite_conditional_entropy_2 ("\<H>'( _ | _ ')")
5.91 +  finite_conditional_entropy ("\<H>'( _ | _ ')")
5.92
5.93  theorem (in dining_cryptographers_space)
5.94    "\<I>( inversion ; payer ) = 0"
5.95  proof -
5.96 -  have b: "1 < (2 :: real)" by simp
5.97    have n: "0 < n" using n_gt_3 by auto
5.98
5.99 -  have lists: "{xs. length xs = n} \<noteq> {}" by auto
5.100 +  have lists: "{xs. length xs = n} \<noteq> {}" using Ex_list_of_length by auto
5.101
5.102    have card_image_inversion:
5.103      "real (card (inversion ` dc_crypto)) = 2^n / 2"
5.104 @@ -526,7 +509,7 @@
5.105
5.106    { have "\<H>(inversion|payer) =
5.107          - (\<Sum>x\<in>inversion`dc_crypto. (\<Sum>z\<in>Some ` {0..<n}. ?dIP {(x, z)} * log 2 (?dIP {(x, z)} / ?dP {z})))"
5.108 -      unfolding finite_conditional_entropy_reduce[OF b] joint_distribution
5.109 +      unfolding conditional_entropy_eq
5.110        by (simp add: image_payer_dc_crypto setsum_Sigma)
5.111      also have "... =
5.112          - (\<Sum>x\<in>inversion`dc_crypto. (\<Sum>z\<in>Some ` {0..<n}. 2 / (real n * 2^n) * (1 - real n)))"
5.113 @@ -560,7 +543,7 @@
5.114      finally have "\<H>(inversion|payer) = real n - 1" . }
5.115    moreover
5.116    { have "\<H>(inversion) = - (\<Sum>x \<in> inversion`dc_crypto. ?dI {x} * log 2 (?dI {x}))"
5.117 -      unfolding finite_entropy_reduce[OF b] by simp
5.118 +      unfolding entropy_eq by simp
5.119      also have "... = - (\<Sum>x \<in> inversion`dc_crypto. 2 * (1 - real n) / 2^n)"
5.120        unfolding neg_equal_iff_equal
5.121      proof (rule setsum_cong[OF refl])
5.122 @@ -577,7 +560,7 @@
5.123      finally have "\<H>(inversion) = real n - 1" .
5.124    }
5.125    ultimately show ?thesis
5.126 -    unfolding finite_mutual_information_eq_entropy_conditional_entropy[OF b]
5.127 +    unfolding mutual_information_eq_entropy_conditional_entropy
5.128      by simp
5.129  qed
5.130
```