author hoelzl Thu Sep 02 19:51:53 2010 +0200 (2010-09-02) changeset 39097 943c7b348524 parent 39096 111756225292 child 39098 21e9bd6cf0a8
Moved lemmas to appropriate locations
```     1.1 --- a/src/HOL/Probability/Information.thy	Thu Sep 02 17:28:00 2010 +0200
1.2 +++ b/src/HOL/Probability/Information.thy	Thu Sep 02 19:51:53 2010 +0200
1.3 @@ -2,11 +2,53 @@
1.4  imports Probability_Space Product_Measure Convex Radon_Nikodym
1.5  begin
1.6
1.7 +lemma log_le: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log a x \<le> log a y"
1.8 +  by (subst log_le_cancel_iff) auto
1.9 +
1.10 +lemma log_less: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x < y \<Longrightarrow> log a x < log a y"
1.11 +  by (subst log_less_cancel_iff) auto
1.12 +
1.13 +lemma setsum_cartesian_product':
1.14 +  "(\<Sum>x\<in>A \<times> B. f x) = (\<Sum>x\<in>A. setsum (\<lambda>y. f (x, y)) B)"
1.15 +  unfolding setsum_cartesian_product by simp
1.16 +
1.17  lemma real_of_pinfreal_inverse[simp]:
1.18    fixes X :: pinfreal
1.19    shows "real (inverse X) = 1 / real X"
1.20    by (cases X) (auto simp: inverse_eq_divide)
1.21
1.22 +lemma (in finite_prob_space) finite_product_prob_space_of_images:
1.23 +  "finite_prob_space \<lparr> space = X ` space M \<times> Y ` space M, sets = Pow (X ` space M \<times> Y ` space M)\<rparr>
1.24 +                     (joint_distribution X Y)"
1.25 +  (is "finite_prob_space ?S _")
1.26 +proof (simp add: finite_prob_space_eq finite_product_measure_space_of_images)
1.27 +  have "X -` X ` space M \<inter> Y -` Y ` space M \<inter> space M = space M" by auto
1.28 +  thus "joint_distribution X Y (X ` space M \<times> Y ` space M) = 1"
1.29 +    by (simp add: distribution_def prob_space vimage_Times comp_def measure_space_1)
1.30 +qed
1.31 +
1.32 +lemma (in finite_prob_space) finite_measure_space_prod:
1.33 +  assumes X: "finite_measure_space MX (distribution X)"
1.34 +  assumes Y: "finite_measure_space MY (distribution Y)"
1.35 +  shows "finite_measure_space (prod_measure_space MX MY) (joint_distribution X Y)"
1.36 +    (is "finite_measure_space ?M ?D")
1.37 +proof (intro finite_measure_spaceI)
1.38 +  interpret X: finite_measure_space MX "distribution X" by fact
1.39 +  interpret Y: finite_measure_space MY "distribution Y" by fact
1.40 +  note finite_measure_space.finite_prod_measure_space[OF X Y, simp]
1.41 +  show "finite (space ?M)" using X.finite_space Y.finite_space by auto
1.42 +  show "joint_distribution X Y {} = 0" by simp
1.43 +  show "sets ?M = Pow (space ?M)" by simp
1.44 +  { fix x show "?D (space ?M) \<noteq> \<omega>" by (rule distribution_finite) }
1.45 +  { fix A B assume "A \<subseteq> space ?M" "B \<subseteq> space ?M" "A \<inter> B = {}"
1.46 +    have *: "(\<lambda>t. (X t, Y t)) -` (A \<union> B) \<inter> space M =
1.47 +             (\<lambda>t. (X t, Y t)) -` A \<inter> space M \<union> (\<lambda>t. (X t, Y t)) -` B \<inter> space M"
1.48 +      by auto
1.49 +    show "?D (A \<union> B) = ?D A + ?D B" unfolding distribution_def *
1.50 +      apply (rule measure_additive[symmetric])
1.51 +      using `A \<inter> B = {}` by (auto simp: sets_eq_Pow) }
1.52 +qed
1.53 +
1.54  section "Convex theory"
1.55
1.56  lemma log_setsum:
1.57 @@ -105,51 +147,19 @@
1.58    finally show ?thesis .
1.59  qed
1.60
1.61 -lemma (in finite_prob_space) sum_over_space_distrib:
1.62 -  "(\<Sum>x\<in>X`space M. distribution X {x}) = 1"
1.63 -  unfolding distribution_def measure_space_1[symmetric] using finite_space
1.64 -  by (subst measure_finitely_additive'')
1.65 -     (auto simp add: disjoint_family_on_def sets_eq_Pow intro!: arg_cong[where f=\<mu>])
1.66 -
1.67 -lemma (in finite_prob_space) sum_over_space_real_distribution:
1.68 -  "(\<Sum>x\<in>X`space M. real (distribution X {x})) = 1"
1.69 -  unfolding distribution_def prob_space[symmetric] using finite_space
1.70 -  by (subst real_finite_measure_finite_Union[symmetric])
1.71 -     (auto simp add: disjoint_family_on_def sets_eq_Pow intro!: arg_cong[where f=prob])
1.72 +lemma split_pairs:
1.73 +  shows
1.74 +    "((A, B) = X) \<longleftrightarrow> (fst X = A \<and> snd X = B)" and
1.75 +    "(X = (A, B)) \<longleftrightarrow> (fst X = A \<and> snd X = B)" by auto
1.76
1.77  section "Information theory"
1.78
1.79 -definition
1.80 -  "KL_divergence b M \<mu> \<nu> =
1.81 -    measure_space.integral M \<mu> (\<lambda>x. log b (real (sigma_finite_measure.RN_deriv M \<nu> \<mu> x)))"
1.82 -
1.83  locale finite_information_space = finite_prob_space +
1.84    fixes b :: real assumes b_gt_1: "1 < b"
1.85
1.86 -lemma (in finite_prob_space) distribution_mono:
1.87 -  assumes "\<And>t. \<lbrakk> t \<in> space M ; X t \<in> x \<rbrakk> \<Longrightarrow> Y t \<in> y"
1.88 -  shows "distribution X x \<le> distribution Y y"
1.89 -  unfolding distribution_def
1.90 -  using assms by (auto simp: sets_eq_Pow intro!: measure_mono)
1.91 -
1.92 -lemma (in prob_space) distribution_remove_const:
1.93 -  shows "joint_distribution X (\<lambda>x. ()) {(x, ())} = distribution X {x}"
1.94 -  and "joint_distribution (\<lambda>x. ()) X {((), x)} = distribution X {x}"
1.95 -  and "joint_distribution X (\<lambda>x. (Y x, ())) {(x, y, ())} = joint_distribution X Y {(x, y)}"
1.96 -  and "joint_distribution X (\<lambda>x. ((), Y x)) {(x, (), y)} = joint_distribution X Y {(x, y)}"
1.97 -  and "distribution (\<lambda>x. ()) {()} = 1"
1.98 -  unfolding measure_space_1[symmetric]
1.99 -  by (auto intro!: arg_cong[where f="\<mu>"] simp: distribution_def)
1.100 -
1.101  context finite_information_space
1.102  begin
1.103
1.104 -lemma distribution_mono_gt_0:
1.105 -  assumes gt_0: "0 < distribution X x"
1.106 -  assumes *: "\<And>t. \<lbrakk> t \<in> space M ; X t \<in> x \<rbrakk> \<Longrightarrow> Y t \<in> y"
1.107 -  shows "0 < distribution Y y"
1.108 -  by (rule less_le_trans[OF gt_0 distribution_mono]) (rule *)
1.109 -
1.110  lemma
1.111    assumes "0 \<le> A" and pos: "0 < A \<Longrightarrow> 0 < B" "0 < A \<Longrightarrow> 0 < C"
1.112    shows mult_log_mult: "A * log b (B * C) = A * log b B + A * log b C" (is "?mult")
1.113 @@ -165,41 +175,6 @@
1.114    thus ?mult and ?div by auto
1.115  qed
1.116
1.117 -lemma split_pairs:
1.118 -  shows
1.119 -    "((A, B) = X) \<longleftrightarrow> (fst X = A \<and> snd X = B)" and
1.120 -    "(X = (A, B)) \<longleftrightarrow> (fst X = A \<and> snd X = B)" by auto
1.121 -
1.122 -lemma (in finite_information_space) distribution_finite:
1.123 -  "distribution X A \<noteq> \<omega>"
1.124 -  using measure_finite[of "X -` A \<inter> space M"]
1.125 -  unfolding distribution_def sets_eq_Pow by auto
1.126 -
1.127 -lemma (in finite_information_space) real_distribution_gt_0[simp]:
1.128 -  "0 < real (distribution Y y) \<longleftrightarrow>  0 < distribution Y y"
1.129 -  using assms by (auto intro!: real_pinfreal_pos distribution_finite)
1.130 -
1.131 -lemma real_distribution_mult_pos_pos:
1.132 -  assumes "0 < distribution Y y"
1.133 -  and "0 < distribution X x"
1.134 -  shows "0 < real (distribution Y y * distribution X x)"
1.135 -  unfolding real_of_pinfreal_mult[symmetric]
1.136 -  using assms by (auto intro!: mult_pos_pos)
1.137 -
1.138 -lemma real_distribution_divide_pos_pos:
1.139 -  assumes "0 < distribution Y y"
1.140 -  and "0 < distribution X x"
1.141 -  shows "0 < real (distribution Y y / distribution X x)"
1.142 -  unfolding divide_pinfreal_def real_of_pinfreal_mult[symmetric]
1.143 -  using assms distribution_finite[of X x] by (cases "distribution X x") (auto intro!: mult_pos_pos)
1.144 -
1.145 -lemma real_distribution_mult_inverse_pos_pos:
1.146 -  assumes "0 < distribution Y y"
1.147 -  and "0 < distribution X x"
1.148 -  shows "0 < real (distribution Y y * inverse (distribution X x))"
1.149 -  unfolding divide_pinfreal_def real_of_pinfreal_mult[symmetric]
1.150 -  using assms distribution_finite[of X x] by (cases "distribution X x") (auto intro!: mult_pos_pos)
1.151 -
1.152  ML {*
1.153
1.154    (* tactic to solve equations of the form @{term "W * log b (X / (Y * Z)) = W * log b X - W * log b (Y * Z)"}
1.155 @@ -252,31 +227,14 @@
1.156
1.157  end
1.158
1.159 -lemma (in finite_measure_space) absolutely_continuousI:
1.160 -  assumes "finite_measure_space M \<nu>"
1.161 -  assumes v: "\<And>x. \<lbrakk> x \<in> space M ; \<mu> {x} = 0 \<rbrakk> \<Longrightarrow> \<nu> {x} = 0"
1.162 -  shows "absolutely_continuous \<nu>"
1.163 -proof (unfold absolutely_continuous_def sets_eq_Pow, safe)
1.164 -  fix N assume "\<mu> N = 0" "N \<subseteq> space M"
1.165 -
1.166 -  interpret v: finite_measure_space M \<nu> by fact
1.167 +subsection "Kullback\$-\$Leibler divergence"
1.168
1.169 -  have "\<nu> N = \<nu> (\<Union>x\<in>N. {x})" by simp
1.170 -  also have "\<dots> = (\<Sum>x\<in>N. \<nu> {x})"
1.171 -  proof (rule v.measure_finitely_additive''[symmetric])
1.172 -    show "finite N" using `N \<subseteq> space M` finite_space by (auto intro: finite_subset)
1.173 -    show "disjoint_family_on (\<lambda>i. {i}) N" unfolding disjoint_family_on_def by auto
1.174 -    fix x assume "x \<in> N" thus "{x} \<in> sets M" using `N \<subseteq> space M` sets_eq_Pow by auto
1.175 -  qed
1.176 -  also have "\<dots> = 0"
1.177 -  proof (safe intro!: setsum_0')
1.178 -    fix x assume "x \<in> N"
1.179 -    hence "\<mu> {x} \<le> \<mu> N" using sets_eq_Pow `N \<subseteq> space M` by (auto intro!: measure_mono)
1.180 -    hence "\<mu> {x} = 0" using `\<mu> N = 0` by simp
1.181 -    thus "\<nu> {x} = 0" using v[of x] `x \<in> N` `N \<subseteq> space M` by auto
1.182 -  qed
1.183 -  finally show "\<nu> N = 0" .
1.184 -qed
1.185 +text {* The Kullback\$-\$Leibler divergence is also known as relative entropy or
1.186 +Kullback\$-\$Leibler distance. *}
1.187 +
1.188 +definition
1.189 +  "KL_divergence b M \<mu> \<nu> =
1.190 +    measure_space.integral M \<mu> (\<lambda>x. log b (real (sigma_finite_measure.RN_deriv M \<nu> \<mu> x)))"
1.191
1.192  lemma (in finite_measure_space) KL_divergence_eq_finite:
1.193    assumes v: "finite_measure_space M \<nu>"
1.194 @@ -285,19 +243,13 @@
1.195  proof (simp add: KL_divergence_def finite_measure_space.integral_finite_singleton[OF v])
1.196    interpret v: finite_measure_space M \<nu> by fact
1.197    have ms: "measure_space M \<nu>" by fact
1.198 -
1.199    have ac: "absolutely_continuous \<nu>"
1.200      using ac by (auto intro!: absolutely_continuousI[OF v])
1.201 -
1.202    show "(\<Sum>x \<in> space M. log b (real (RN_deriv \<nu> x)) * real (\<nu> {x})) = ?sum"
1.203      using RN_deriv_finite_measure[OF ms ac]
1.204      by (auto intro!: setsum_cong simp: field_simps real_of_pinfreal_mult[symmetric])
1.205  qed
1.206
1.207 -lemma (in finite_prob_space) finite_sum_over_space_eq_1:
1.208 -  "(\<Sum>x\<in>space M. real (\<mu> {x})) = 1"
1.209 -  using sum_over_space_eq_1 finite_measure by (simp add: real_of_pinfreal_setsum sets_eq_Pow)
1.210 -
1.211  lemma (in finite_prob_space) KL_divergence_positive_finite:
1.212    assumes v: "finite_prob_space M \<nu>"
1.213    assumes ac: "\<And>x. \<lbrakk> x \<in> space M ; \<mu> {x} = 0 \<rbrakk> \<Longrightarrow> \<nu> {x} = 0"
1.214 @@ -322,13 +274,15 @@
1.215        fix x assume x: "x \<in> space M"
1.216        { assume "0 < real (\<nu> {x})"
1.217          hence "\<mu> {x} \<noteq> 0" using ac[OF x] by auto
1.218 -        thus "0 < prob {x}" using measure_finite[of "{x}"] sets_eq_Pow x
1.219 +        thus "0 < prob {x}" using finite_measure[of "{x}"] sets_eq_Pow x
1.220            by (cases "\<mu> {x}") simp_all }
1.221      qed auto
1.222    qed
1.223    thus "0 \<le> KL_divergence b M \<nu> \<mu>" using finite_sum_over_space_eq_1 by simp
1.224  qed
1.225
1.226 +subsection {* Mutual Information *}
1.227 +
1.228  definition (in prob_space)
1.229    "mutual_information b S T X Y =
1.230      KL_divergence b (prod_measure_space S T)
1.231 @@ -341,24 +295,6 @@
1.232      \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr>
1.233      \<lparr> space = Y`space M, sets = Pow (Y`space M) \<rparr> X Y"
1.234
1.235 -lemma prod_measure_times_finite:
1.236 -  assumes fms: "finite_measure_space M \<mu>" "finite_measure_space N \<nu>" and a: "a \<in> space M \<times> space N"
1.237 -  shows "prod_measure M \<mu> N \<nu> {a} = \<mu> {fst a} * \<nu> {snd a}"
1.238 -proof (cases a)
1.239 -  case (Pair b c)
1.240 -  hence a_eq: "{a} = {b} \<times> {c}" by simp
1.241 -
1.242 -  interpret M: finite_measure_space M \<mu> by fact
1.243 -  interpret N: finite_measure_space N \<nu> by fact
1.244 -
1.245 -  from finite_measure_space.finite_prod_measure_times[OF fms, of "{b}" "{c}"] M.sets_eq_Pow N.sets_eq_Pow a Pair
1.246 -  show ?thesis unfolding a_eq by simp
1.247 -qed
1.248 -
1.249 -lemma setsum_cartesian_product':
1.250 -  "(\<Sum>x\<in>A \<times> B. f x) = (\<Sum>x\<in>A. setsum (\<lambda>y. f (x, y)) B)"
1.251 -  unfolding setsum_cartesian_product by simp
1.252 -
1.253  lemma (in finite_information_space) mutual_information_generic_eq:
1.254    assumes MX: "finite_measure_space MX (distribution X)"
1.255    assumes MY: "finite_measure_space MY (distribution Y)"
1.256 @@ -478,9 +414,26 @@
1.257                                                     (real (distribution X {x}) * real (distribution Y {y}))))"
1.258    by (subst mutual_information_eq_generic) (simp_all add: finite_prob_space_of_images)
1.259
1.260 +lemma (in finite_information_space) mutual_information_cong:
1.261 +  assumes X: "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
1.262 +  assumes Y: "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
1.263 +  shows "\<I>(X ; Y) = \<I>(X' ; Y')"
1.264 +proof -
1.265 +  have "X ` space M = X' ` space M" using X by (auto intro!: image_eqI)
1.266 +  moreover have "Y ` space M = Y' ` space M" using Y by (auto intro!: image_eqI)
1.267 +  ultimately show ?thesis
1.268 +  unfolding mutual_information_eq
1.269 +    using
1.270 +      assms[THEN distribution_cong]
1.271 +      joint_distribution_cong[OF assms]
1.272 +    by (auto intro!: setsum_cong)
1.273 +qed
1.274 +
1.275  lemma (in finite_information_space) mutual_information_positive: "0 \<le> \<I>(X;Y)"
1.276    by (subst mutual_information_positive_generic) (simp_all add: finite_prob_space_of_images)
1.277
1.278 +subsection {* Entropy *}
1.279 +
1.280  definition (in prob_space)
1.281    "entropy b s X = mutual_information b s s X X"
1.282
1.283 @@ -488,32 +441,146 @@
1.284    finite_entropy ("\<H>'(_')") where
1.285    "\<H>(X) \<equiv> entropy b \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr> X"
1.286
1.287 -lemma (in finite_information_space) joint_distribution_remove[simp]:
1.288 -    "joint_distribution X X {(x, x)} = distribution X {x}"
1.289 -  unfolding distribution_def by (auto intro!: arg_cong[where f="\<mu>"])
1.290 +lemma (in finite_information_space) entropy_generic_eq:
1.291 +  assumes MX: "finite_measure_space MX (distribution X)"
1.292 +  shows "entropy b MX X = -(\<Sum> x \<in> space MX. real (distribution X {x}) * log b (real (distribution X {x})))"
1.293 +proof -
1.294 +  let "?X x" = "real (distribution X {x})"
1.295 +  let "?XX x y" = "real (joint_distribution X X {(x, y)})"
1.296 +  interpret MX: finite_measure_space MX "distribution X" by fact
1.297 +  { fix x y
1.298 +    have "(\<lambda>x. (X x, X x)) -` {(x, y)} = (if x = y then X -` {x} else {})" by auto
1.299 +    then have "?XX x y * log b (?XX x y / (?X x * ?X y)) =
1.300 +        (if x = y then - ?X y * log b (?X y) else 0)"
1.301 +      unfolding distribution_def by (auto simp: mult_log_divide) }
1.302 +  note remove_XX = this
1.303 +  show ?thesis
1.304 +    unfolding entropy_def mutual_information_generic_eq[OF MX MX]
1.305 +    unfolding setsum_cartesian_product[symmetric] setsum_negf[symmetric] remove_XX
1.306 +    by (auto simp: setsum_cases MX.finite_space)
1.307 +qed
1.308
1.309  lemma (in finite_information_space) entropy_eq:
1.310    "\<H>(X) = -(\<Sum> x \<in> X ` space M. real (distribution X {x}) * log b (real (distribution X {x})))"
1.311 -proof -
1.312 -  { fix f
1.313 -    { fix x y
1.314 -      have "(\<lambda>x. (X x, X x)) -` {(x, y)} = (if x = y then X -` {x} else {})" by auto
1.315 -        hence "real (distribution (\<lambda>x. (X x, X x))  {(x,y)}) * f x y =
1.316 -            (if x = y then real (distribution X {x}) * f x y else 0)"
1.317 -        unfolding distribution_def by auto }
1.318 -    hence "(\<Sum>(x, y) \<in> X ` space M \<times> X ` space M. real (joint_distribution X X {(x, y)}) * f x y) =
1.319 -      (\<Sum>x \<in> X ` space M. real (distribution X {x}) * f x x)"
1.320 -      unfolding setsum_cartesian_product' by (simp add: setsum_cases finite_space) }
1.321 -  note remove_cartesian_product = this
1.322 -
1.323 -  show ?thesis
1.324 -    unfolding entropy_def mutual_information_eq setsum_negf[symmetric] remove_cartesian_product
1.325 -    by (auto intro!: setsum_cong)
1.326 -qed
1.327 +  by (simp add: finite_measure_space entropy_generic_eq)
1.328
1.329  lemma (in finite_information_space) entropy_positive: "0 \<le> \<H>(X)"
1.330    unfolding entropy_def using mutual_information_positive .
1.331
1.332 +lemma (in finite_information_space) entropy_certainty_eq_0:
1.333 +  assumes "x \<in> X ` space M" and "distribution X {x} = 1"
1.334 +  shows "\<H>(X) = 0"
1.335 +proof -
1.336 +  interpret X: finite_prob_space "\<lparr> space = X ` space M, sets = Pow (X ` space M) \<rparr>" "distribution X"
1.337 +    by (rule finite_prob_space_of_images)
1.338 +
1.339 +  have "distribution X (X ` space M - {x}) = distribution X (X ` space M) - distribution X {x}"
1.340 +    using X.measure_compl[of "{x}"] assms by auto
1.341 +  also have "\<dots> = 0" using X.prob_space assms by auto
1.342 +  finally have X0: "distribution X (X ` space M - {x}) = 0" by auto
1.343 +
1.344 +  { fix y assume asm: "y \<noteq> x" "y \<in> X ` space M"
1.345 +    hence "{y} \<subseteq> X ` space M - {x}" by auto
1.346 +    from X.measure_mono[OF this] X0 asm
1.347 +    have "distribution X {y} = 0" by auto }
1.348 +
1.349 +  hence fi: "\<And> y. y \<in> X ` space M \<Longrightarrow> real (distribution X {y}) = (if x = y then 1 else 0)"
1.350 +    using assms by auto
1.351 +
1.352 +  have y: "\<And>y. (if x = y then 1 else 0) * log b (if x = y then 1 else 0) = 0" by simp
1.353 +
1.354 +  show ?thesis unfolding entropy_eq by (auto simp: y fi)
1.355 +qed
1.356 +
1.357 +lemma (in finite_information_space) entropy_le_card_not_0:
1.358 +  "\<H>(X) \<le> log b (real (card (X ` space M \<inter> {x . distribution X {x} \<noteq> 0})))"
1.359 +proof -
1.360 +  let "?d x" = "distribution X {x}"
1.361 +  let "?p x" = "real (?d x)"
1.362 +  have "\<H>(X) = (\<Sum>x\<in>X`space M. ?p x * log b (1 / ?p x))"
1.363 +    by (auto intro!: setsum_cong simp: entropy_eq setsum_negf[symmetric])
1.364 +  also have "\<dots> \<le> log b (\<Sum>x\<in>X`space M. ?p x * (1 / ?p x))"
1.365 +    apply (rule log_setsum')
1.366 +    using not_empty b_gt_1 finite_space sum_over_space_real_distribution
1.367 +    by auto
1.368 +  also have "\<dots> = log b (\<Sum>x\<in>X`space M. if ?d x \<noteq> 0 then 1 else 0)"
1.369 +    apply (rule arg_cong[where f="\<lambda>f. log b (\<Sum>x\<in>X`space M. f x)"])
1.370 +    using distribution_finite[of X] by (auto simp: expand_fun_eq real_of_pinfreal_eq_0)
1.371 +  finally show ?thesis
1.372 +    using finite_space by (auto simp: setsum_cases real_eq_of_nat)
1.373 +qed
1.374 +
1.375 +lemma (in finite_information_space) entropy_uniform_max:
1.376 +  assumes "\<And>x y. \<lbrakk> x \<in> X ` space M ; y \<in> X ` space M \<rbrakk> \<Longrightarrow> distribution X {x} = distribution X {y}"
1.377 +  shows "\<H>(X) = log b (real (card (X ` space M)))"
1.378 +proof -
1.379 +  note uniform =
1.380 +    finite_prob_space_of_images[of X, THEN finite_prob_space.uniform_prob, simplified]
1.381 +
1.382 +  have card_gt0: "0 < card (X ` space M)" unfolding card_gt_0_iff
1.383 +    using finite_space not_empty by auto
1.384 +
1.385 +  { fix x assume "x \<in> X ` space M"
1.386 +    hence "real (distribution X {x}) = 1 / real (card (X ` space M))"
1.387 +    proof (rule uniform)
1.388 +      fix x y assume "x \<in> X`space M" "y \<in> X`space M"
1.389 +      from assms[OF this] show "real (distribution X {x}) = real (distribution X {y})" by simp
1.390 +    qed }
1.391 +  thus ?thesis
1.392 +    using not_empty finite_space b_gt_1 card_gt0
1.393 +    by (simp add: entropy_eq real_eq_of_nat[symmetric] log_divide)
1.394 +qed
1.395 +
1.396 +lemma (in finite_information_space) entropy_le_card:
1.397 +  "\<H>(X) \<le> log b (real (card (X ` space M)))"
1.398 +proof cases
1.399 +  assume "X ` space M \<inter> {x. distribution X {x} \<noteq> 0} = {}"
1.400 +  then have "\<And>x. x\<in>X`space M \<Longrightarrow> distribution X {x} = 0" by auto
1.401 +  moreover
1.402 +  have "0 < card (X`space M)"
1.403 +    using finite_space not_empty unfolding card_gt_0_iff by auto
1.404 +  then have "log b 1 \<le> log b (real (card (X`space M)))"
1.405 +    using b_gt_1 by (intro log_le) auto
1.406 +  ultimately show ?thesis unfolding entropy_eq by simp
1.407 +next
1.408 +  assume False: "X ` space M \<inter> {x. distribution X {x} \<noteq> 0} \<noteq> {}"
1.409 +  have "card (X ` space M \<inter> {x. distribution X {x} \<noteq> 0}) \<le> card (X ` space M)"
1.410 +    (is "?A \<le> ?B") using finite_space not_empty by (auto intro!: card_mono)
1.411 +  note entropy_le_card_not_0
1.412 +  also have "log b (real ?A) \<le> log b (real ?B)"
1.413 +    using b_gt_1 False finite_space not_empty `?A \<le> ?B`
1.414 +    by (auto intro!: log_le simp: card_gt_0_iff)
1.415 +  finally show ?thesis .
1.416 +qed
1.417 +
1.418 +lemma (in finite_information_space) entropy_commute:
1.419 +  "\<H>(\<lambda>x. (X x, Y x)) = \<H>(\<lambda>x. (Y x, X x))"
1.420 +proof -
1.421 +  have *: "(\<lambda>x. (Y x, X x))`space M = (\<lambda>(a,b). (b,a))`(\<lambda>x. (X x, Y x))`space M"
1.422 +    by auto
1.423 +  have inj: "\<And>X. inj_on (\<lambda>(a,b). (b,a)) X"
1.424 +    by (auto intro!: inj_onI)
1.425 +  show ?thesis
1.426 +    unfolding entropy_eq unfolding * setsum_reindex[OF inj]
1.427 +    by (simp add: joint_distribution_commute[of Y X] split_beta)
1.428 +qed
1.429 +
1.430 +lemma (in finite_information_space) entropy_eq_cartesian_sum:
1.431 +  "\<H>(\<lambda>x. (X x, Y x)) = -(\<Sum>x\<in>X`space M. \<Sum>y\<in>Y`space M.
1.432 +    real (joint_distribution X Y {(x,y)}) *
1.433 +    log b (real (joint_distribution X Y {(x,y)})))"
1.434 +proof -
1.435 +  { fix x assume "x\<notin>(\<lambda>x. (X x, Y x))`space M"
1.436 +    then have "(\<lambda>x. (X x, Y x)) -` {x} \<inter> space M = {}" by auto
1.437 +    then have "joint_distribution X Y {x} = 0"
1.438 +      unfolding distribution_def by auto }
1.439 +  then show ?thesis using finite_space
1.440 +    unfolding entropy_eq neg_equal_iff_equal setsum_cartesian_product
1.441 +    by (auto intro!: setsum_mono_zero_cong_left)
1.442 +qed
1.443 +
1.444 +subsection {* Conditional Mutual Information *}
1.445 +
1.446  definition (in prob_space)
1.447    "conditional_mutual_information b M1 M2 M3 X Y Z \<equiv>
1.448      mutual_information b M1 (prod_measure_space M2 M3) X (\<lambda>x. (Y x, Z x)) -
1.449 @@ -527,87 +594,32 @@
1.450      \<lparr> space = Z`space M, sets = Pow (Z`space M) \<rparr>
1.451      X Y Z"
1.452
1.453 -lemma (in finite_information_space) setsum_distribution_gen:
1.454 -  assumes "Z -` {c} \<inter> space M = (\<Union>x \<in> X`space M. Y -` {f x}) \<inter> space M"
1.455 -  and "inj_on f (X`space M)"
1.456 -  shows "(\<Sum>x \<in> X`space M. distribution Y {f x}) = distribution Z {c}"
1.457 -  unfolding distribution_def assms
1.458 -  using finite_space assms
1.459 -  by (subst measure_finitely_additive'')
1.460 -     (auto simp add: disjoint_family_on_def sets_eq_Pow inj_on_def
1.461 -      intro!: arg_cong[where f=prob])
1.462 -
1.463 -lemma (in finite_information_space) setsum_distribution:
1.464 -  "(\<Sum>x \<in> X`space M. joint_distribution X Y {(x, y)}) = distribution Y {y}"
1.465 -  "(\<Sum>y \<in> Y`space M. joint_distribution X Y {(x, y)}) = distribution X {x}"
1.466 -  "(\<Sum>x \<in> X`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution Y Z {(y, z)}"
1.467 -  "(\<Sum>y \<in> Y`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution X Z {(x, z)}"
1.468 -  "(\<Sum>z \<in> Z`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution X Y {(x, y)}"
1.469 -  by (auto intro!: inj_onI setsum_distribution_gen)
1.470 -
1.471 -lemma (in finite_information_space) setsum_real_distribution_gen:
1.472 -  assumes "Z -` {c} \<inter> space M = (\<Union>x \<in> X`space M. Y -` {f x}) \<inter> space M"
1.473 -  and "inj_on f (X`space M)"
1.474 -  shows "(\<Sum>x \<in> X`space M. real (distribution Y {f x})) = real (distribution Z {c})"
1.475 -  unfolding distribution_def assms
1.476 -  using finite_space assms
1.477 -  by (subst real_finite_measure_finite_Union[symmetric])
1.478 -     (auto simp add: disjoint_family_on_def sets_eq_Pow inj_on_def
1.479 -        intro!: arg_cong[where f=prob])
1.480 -
1.481 -lemma (in finite_information_space) setsum_real_distribution:
1.482 -  "(\<Sum>x \<in> X`space M. real (joint_distribution X Y {(x, y)})) = real (distribution Y {y})"
1.483 -  "(\<Sum>y \<in> Y`space M. real (joint_distribution X Y {(x, y)})) = real (distribution X {x})"
1.484 -  "(\<Sum>x \<in> X`space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)})) = real (joint_distribution Y Z {(y, z)})"
1.485 -  "(\<Sum>y \<in> Y`space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)})) = real (joint_distribution X Z {(x, z)})"
1.486 -  "(\<Sum>z \<in> Z`space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)})) = real (joint_distribution X Y {(x, y)})"
1.487 -  by (auto intro!: inj_onI setsum_real_distribution_gen)
1.488 +lemma (in finite_information_space) conditional_mutual_information_generic_eq:
1.489 +  assumes MX: "finite_measure_space MX (distribution X)"
1.490 +  assumes MY: "finite_measure_space MY (distribution Y)"
1.491 +  assumes MZ: "finite_measure_space MZ (distribution Z)"
1.492 +  shows "conditional_mutual_information b MX MY MZ X Y Z =
1.493 +    (\<Sum>(x, y, z)\<in>space MX \<times> space MY \<times> space MZ.
1.494 +      real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) *
1.495 +      log b (real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) /
1.496 +                   (real (distribution X {x}) * real (joint_distribution Y Z {(y, z)})))) -
1.497 +    (\<Sum>(x, y)\<in>space MX \<times> space MZ.
1.498 +      real (joint_distribution X Z {(x, y)}) *
1.499 +      log b (real (joint_distribution X Z {(x, y)}) / (real (distribution X {x}) * real (distribution Z {y}))))"
1.500 +  using assms finite_measure_space_prod[OF MY MZ]
1.501 +  unfolding conditional_mutual_information_def
1.502 +  by (subst (1 2) mutual_information_generic_eq)
1.503 +     (simp_all add: setsum_cartesian_product' finite_measure_space.finite_prod_measure_space)
1.504
1.505 -lemma (in finite_information_space) conditional_mutual_information_eq_sum:
1.506 -   "\<I>(X ; Y | Z) =
1.507 -     (\<Sum>(x, y, z)\<in>X ` space M \<times> (\<lambda>x. (Y x, Z x)) ` space M.
1.508 -             real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) *
1.509 -             log b (real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)})/
1.510 -        real (distribution (\<lambda>x. (Y x, Z x)) {(y, z)}))) -
1.511 -     (\<Sum>(x, z)\<in>X ` space M \<times> Z ` space M.
1.512 -        real (distribution (\<lambda>x. (X x, Z x)) {(x,z)}) * log b (real (distribution (\<lambda>x. (X x, Z x)) {(x,z)}) / real (distribution Z {z})))"
1.513 -  (is "_ = ?rhs")
1.514 -proof -
1.515 -  have setsum_product:
1.516 -    "\<And>f x. (\<Sum>v\<in>(\<lambda>x. (Y x, Z x)) ` space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x,v)}) * f v)
1.517 -      = (\<Sum>v\<in>Y ` space M \<times> Z ` space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x,v)}) * f v)"
1.518 -  proof (safe intro!: setsum_mono_zero_cong_left imageI)
1.519 -    fix x y z f
1.520 -    assume *: "(Y y, Z z) \<notin> (\<lambda>x. (Y x, Z x)) ` space M" and "y \<in> space M" "z \<in> space M"
1.521 -    hence "(\<lambda>x. (X x, Y x, Z x)) -` {(x, Y y, Z z)} \<inter> space M = {}"
1.522 -    proof safe
1.523 -      fix x' assume x': "x' \<in> space M" and eq: "Y x' = Y y" "Z x' = Z z"
1.524 -      have "(Y y, Z z) \<in> (\<lambda>x. (Y x, Z x)) ` space M" using eq[symmetric] x' by auto
1.525 -      thus "x' \<in> {}" using * by auto
1.526 -    qed
1.527 -    thus "real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, Y y, Z z)}) * f (Y y) (Z z) = 0"
1.528 -      unfolding distribution_def by simp
1.529 -  qed (simp add: finite_space)
1.530 -
1.531 -  thus ?thesis
1.532 -    unfolding conditional_mutual_information_def Let_def mutual_information_eq
1.533 -    by (subst mutual_information_eq_generic)
1.534 -       (auto simp: prod_measure_space_def sigma_prod_sets_finite finite_space sigma_def
1.535 -        finite_prob_space_of_images finite_product_prob_space_of_images
1.536 -        setsum_cartesian_product' setsum_product setsum_subtractf setsum_addf
1.537 -        setsum_left_distrib[symmetric] setsum_real_distribution
1.538 -      cong: setsum_cong)
1.539 -qed
1.540
1.541  lemma (in finite_information_space) conditional_mutual_information_eq:
1.542    "\<I>(X ; Y | Z) = (\<Sum>(x, y, z) \<in> X ` space M \<times> Y ` space M \<times> Z ` space M.
1.543               real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) *
1.544               log b (real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) /
1.545      (real (joint_distribution X Z {(x, z)}) * real (joint_distribution Y Z {(y,z)} / distribution Z {z}))))"
1.546 -  unfolding conditional_mutual_information_def Let_def mutual_information_eq
1.547 -  by (subst mutual_information_eq_generic)
1.548 +  by (subst conditional_mutual_information_generic_eq)
1.549       (auto simp add: prod_measure_space_def sigma_prod_sets_finite finite_space
1.550 -      finite_prob_space_of_images finite_product_prob_space_of_images sigma_def
1.551 +      finite_measure_space finite_product_prob_space_of_images sigma_def
1.552        setsum_cartesian_product' setsum_product setsum_subtractf setsum_addf
1.553        setsum_left_distrib[symmetric] setsum_real_distribution setsum_commute[where A="Y`space M"]
1.554        real_of_pinfreal_mult[symmetric]
1.555 @@ -623,22 +635,6 @@
1.556      by (simp add: setsum_cartesian_product' distribution_remove_const)
1.557  qed
1.558
1.559 -lemma (in finite_prob_space) distribution_finite:
1.560 -  "distribution X A \<noteq> \<omega>"
1.561 -  by (auto simp: sets_eq_Pow distribution_def intro!: measure_finite)
1.562 -
1.563 -lemma (in finite_prob_space) real_distribution_order:
1.564 -  shows "r \<le> real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r \<le> real (distribution X {x})"
1.565 -  and "r \<le> real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r \<le> real (distribution Y {y})"
1.566 -  and "r < real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r < real (distribution X {x})"
1.567 -  and "r < real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r < real (distribution Y {y})"
1.568 -  and "distribution X {x} = 0 \<Longrightarrow> real (joint_distribution X Y {(x, y)}) = 0"
1.569 -  and "distribution Y {y} = 0 \<Longrightarrow> real (joint_distribution X Y {(x, y)}) = 0"
1.570 -  using real_of_pinfreal_mono[OF distribution_finite joint_distribution_restriction_fst, of X Y "{(x, y)}"]
1.571 -  using real_of_pinfreal_mono[OF distribution_finite joint_distribution_restriction_snd, of X Y "{(x, y)}"]
1.572 -  using real_pinfreal_nonneg[of "joint_distribution X Y {(x, y)}"]
1.573 -  by auto
1.574 -
1.575  lemma (in finite_information_space) conditional_mutual_information_positive:
1.576    "0 \<le> \<I>(X ; Y | Z)"
1.577  proof -
1.578 @@ -682,6 +678,8 @@
1.579      by (simp add: real_of_pinfreal_mult[symmetric])
1.580  qed
1.581
1.582 +subsection {* Conditional Entropy *}
1.583 +
1.584  definition (in prob_space)
1.585    "conditional_entropy b S T X Y = conditional_mutual_information b S S T X X Y"
1.586
1.587 @@ -694,19 +692,69 @@
1.588  lemma (in finite_information_space) conditional_entropy_positive:
1.589    "0 \<le> \<H>(X | Y)" unfolding conditional_entropy_def using conditional_mutual_information_positive .
1.590
1.591 +lemma (in finite_information_space) conditional_entropy_generic_eq:
1.592 +  assumes MX: "finite_measure_space MX (distribution X)"
1.593 +  assumes MY: "finite_measure_space MZ (distribution Z)"
1.594 +  shows "conditional_entropy b MX MZ X Z =
1.595 +     - (\<Sum>(x, z)\<in>space MX \<times> space MZ.
1.596 +         real (joint_distribution X Z {(x, z)}) *
1.597 +         log b (real (joint_distribution X Z {(x, z)}) / real (distribution Z {z})))"
1.598 +  unfolding conditional_entropy_def using assms
1.599 +  apply (simp add: conditional_mutual_information_generic_eq
1.600 +                   setsum_cartesian_product' setsum_commute[of _ "space MZ"]
1.601 +                   setsum_negf[symmetric] setsum_subtractf[symmetric])
1.602 +proof (safe intro!: setsum_cong, simp)
1.603 +  fix z x assume "z \<in> space MZ" "x \<in> space MX"
1.604 +  let "?XXZ x'" = "real (joint_distribution X (\<lambda>x. (X x, Z x)) {(x, x', z)})"
1.605 +  let "?XZ x'" = "real (joint_distribution X Z {(x', z)})"
1.606 +  let "?X" = "real (distribution X {x})"
1.607 +  interpret MX: finite_measure_space MX "distribution X" by fact
1.608 +  have *: "\<And>A. A = {} \<Longrightarrow> prob A = 0" by simp
1.609 +  have XXZ: "\<And>x'. ?XXZ x' = (if x' = x then ?XZ x else 0)"
1.610 +    by (auto simp: distribution_def intro!: arg_cong[where f=prob] *)
1.611 +  have "(\<Sum>x'\<in>space MX. ?XXZ x' * log b (?XXZ x') - (?XXZ x' * log b ?X + ?XXZ x' * log b (?XZ x'))) =
1.612 +    (\<Sum>x'\<in>{x}. ?XZ x' * log b (?XZ x') - (?XZ x' * log b ?X + ?XZ x' * log b (?XZ x')))"
1.613 +    using `x \<in> space MX` MX.finite_space
1.614 +    by (safe intro!: setsum_mono_zero_cong_right)
1.615 +       (auto split: split_if_asm simp: XXZ)
1.616 +  then show "(\<Sum>x'\<in>space MX. ?XXZ x' * log b (?XXZ x') - (?XXZ x' * log b ?X + ?XXZ x' * log b (?XZ x'))) +
1.617 +      ?XZ x * log b ?X = 0" by simp
1.618 +qed
1.619 +
1.620  lemma (in finite_information_space) conditional_entropy_eq:
1.621    "\<H>(X | Z) =
1.622       - (\<Sum>(x, z)\<in>X ` space M \<times> Z ` space M.
1.623           real (joint_distribution X Z {(x, z)}) *
1.624           log b (real (joint_distribution X Z {(x, z)}) / real (distribution Z {z})))"
1.625 +  by (simp add: finite_measure_space conditional_entropy_generic_eq)
1.626 +
1.627 +lemma (in finite_information_space) conditional_entropy_eq_ce_with_hypothesis:
1.628 +  "\<H>(X | Y) =
1.629 +    -(\<Sum>y\<in>Y`space M. real (distribution Y {y}) *
1.630 +      (\<Sum>x\<in>X`space M. real (joint_distribution X Y {(x,y)}) / real (distribution Y {(y)}) *
1.631 +              log b (real (joint_distribution X Y {(x,y)}) / real (distribution Y {(y)}))))"
1.632 +  unfolding conditional_entropy_eq neg_equal_iff_equal
1.633 +  apply (simp add: setsum_commute[of _ "Y`space M"] setsum_cartesian_product' setsum_divide_distrib[symmetric])
1.634 +  apply (safe intro!: setsum_cong)
1.635 +  using real_distribution_order'[of Y _ X _]
1.636 +  by (auto simp add: setsum_subtractf[of _ _ "X`space M"])
1.637 +
1.638 +lemma (in finite_information_space) conditional_entropy_eq_cartesian_sum:
1.639 +  "\<H>(X | Y) = -(\<Sum>x\<in>X`space M. \<Sum>y\<in>Y`space M.
1.640 +    real (joint_distribution X Y {(x,y)}) *
1.641 +    log b (real (joint_distribution X Y {(x,y)}) / real (distribution Y {y})))"
1.642  proof -
1.643 -  have *: "\<And>x y z. (\<lambda>x. (X x, X x, Z x)) -` {(x, y, z)} = (if x = y then (\<lambda>x. (X x, Z x)) -` {(x, z)} else {})" by auto
1.644 -  show ?thesis
1.645 -    unfolding conditional_mutual_information_eq_sum
1.646 -      conditional_entropy_def distribution_def *
1.647 -    by (auto intro!: setsum_0')
1.648 +  { fix x assume "x\<notin>(\<lambda>x. (X x, Y x))`space M"
1.649 +    then have "(\<lambda>x. (X x, Y x)) -` {x} \<inter> space M = {}" by auto
1.650 +    then have "joint_distribution X Y {x} = 0"
1.651 +      unfolding distribution_def by auto }
1.652 +  then show ?thesis using finite_space
1.653 +    unfolding conditional_entropy_eq neg_equal_iff_equal setsum_cartesian_product
1.654 +    by (auto intro!: setsum_mono_zero_cong_left)
1.655  qed
1.656
1.657 +subsection {* Equalities *}
1.658 +
1.659  lemma (in finite_information_space) mutual_information_eq_entropy_conditional_entropy:
1.660    "\<I>(X ; Z) = \<H>(X) - \<H>(X | Z)"
1.661    unfolding mutual_information_eq entropy_eq conditional_entropy_eq
1.662 @@ -722,109 +770,15 @@
1.663    show ?thesis by auto
1.664  qed
1.665
1.666 -(* -------------Entropy of a RV with a certain event is zero---------------- *)
1.667 -
1.668 -lemma (in finite_information_space) finite_entropy_certainty_eq_0:
1.669 -  assumes "x \<in> X ` space M" and "distribution X {x} = 1"
1.670 -  shows "\<H>(X) = 0"
1.671 -proof -
1.672 -  interpret X: finite_prob_space "\<lparr> space = X ` space M, sets = Pow (X ` space M) \<rparr>" "distribution X"
1.673 -    by (rule finite_prob_space_of_images)
1.674 -
1.675 -  have "distribution X (X ` space M - {x}) = distribution X (X ` space M) - distribution X {x}"
1.676 -    using X.measure_compl[of "{x}"] assms by auto
1.677 -  also have "\<dots> = 0" using X.prob_space assms by auto
1.678 -  finally have X0: "distribution X (X ` space M - {x}) = 0" by auto
1.679 -
1.680 -  { fix y assume asm: "y \<noteq> x" "y \<in> X ` space M"
1.681 -    hence "{y} \<subseteq> X ` space M - {x}" by auto
1.682 -    from X.measure_mono[OF this] X0 asm
1.683 -    have "distribution X {y} = 0" by auto }
1.684 -
1.685 -  hence fi: "\<And> y. y \<in> X ` space M \<Longrightarrow> real (distribution X {y}) = (if x = y then 1 else 0)"
1.686 -    using assms by auto
1.687 -
1.688 -  have y: "\<And>y. (if x = y then 1 else 0) * log b (if x = y then 1 else 0) = 0" by simp
1.689 -
1.690 -  show ?thesis unfolding entropy_eq by (auto simp: y fi)
1.691 -qed
1.692 -(* --------------- upper bound on entropy for a rv ------------------------- *)
1.693 -
1.694 -lemma (in finite_prob_space) distribution_1:
1.695 -  "distribution X A \<le> 1"
1.696 -  unfolding distribution_def measure_space_1[symmetric]
1.697 -  by (auto intro!: measure_mono simp: sets_eq_Pow)
1.698 -
1.699 -lemma (in finite_prob_space) real_distribution_1:
1.700 -  "real (distribution X A) \<le> 1"
1.701 -  unfolding real_pinfreal_1[symmetric]
1.702 -  by (rule real_of_pinfreal_mono[OF _ distribution_1]) simp
1.703 +lemma (in finite_information_space) entropy_chain_rule:
1.704 +  "\<H>(\<lambda>x. (X x, Y x)) = \<H>(X) + \<H>(Y|X)"
1.705 +  unfolding entropy_eq[of X] entropy_eq_cartesian_sum conditional_entropy_eq_cartesian_sum
1.706 +  unfolding setsum_commute[of _ "X`space M"] setsum_negf[symmetric] setsum_addf[symmetric]
1.707 +  by (rule setsum_cong)
1.708 +     (simp_all add: setsum_negf setsum_addf setsum_subtractf setsum_real_distribution
1.709 +                    setsum_left_distrib[symmetric] joint_distribution_commute[of X Y])
1.710
1.711 -lemma (in finite_information_space) finite_entropy_le_card:
1.712 -  "\<H>(X) \<le> log b (real (card (X ` space M \<inter> {x . distribution X {x} \<noteq> 0})))"
1.713 -proof -
1.714 -  let "?d x" = "distribution X {x}"
1.715 -  let "?p x" = "real (?d x)"
1.716 -  have "\<H>(X) = (\<Sum>x\<in>X`space M. ?p x * log b (1 / ?p x))"
1.717 -    by (auto intro!: setsum_cong simp: entropy_eq setsum_negf[symmetric])
1.718 -  also have "\<dots> \<le> log b (\<Sum>x\<in>X`space M. ?p x * (1 / ?p x))"
1.719 -    apply (rule log_setsum')
1.720 -    using not_empty b_gt_1 finite_space sum_over_space_real_distribution
1.721 -    by auto
1.722 -  also have "\<dots> = log b (\<Sum>x\<in>X`space M. if ?d x \<noteq> 0 then 1 else 0)"
1.723 -    apply (rule arg_cong[where f="\<lambda>f. log b (\<Sum>x\<in>X`space M. f x)"])
1.724 -    using distribution_finite[of X] by (auto simp: expand_fun_eq real_of_pinfreal_eq_0)
1.725 -  finally show ?thesis
1.726 -    using finite_space by (auto simp: setsum_cases real_eq_of_nat)
1.727 -qed
1.728 -
1.729 -(* --------------- entropy is maximal for a uniform rv --------------------- *)
1.730 -
1.731 -lemma (in finite_prob_space) uniform_prob:
1.732 -  assumes "x \<in> space M"
1.733 -  assumes "\<And> x y. \<lbrakk>x \<in> space M ; y \<in> space M\<rbrakk> \<Longrightarrow> prob {x} = prob {y}"
1.734 -  shows "prob {x} = 1 / real (card (space M))"
1.735 -proof -
1.736 -  have prob_x: "\<And> y. y \<in> space M \<Longrightarrow> prob {y} = prob {x}"
1.737 -    using assms(2)[OF _ `x \<in> space M`] by blast
1.738 -  have "1 = prob (space M)"
1.739 -    using prob_space by auto
1.740 -  also have "\<dots> = (\<Sum> x \<in> space M. prob {x})"
1.741 -    using real_finite_measure_finite_Union[of "space M" "\<lambda> x. {x}", simplified]
1.742 -      sets_eq_Pow inj_singleton[unfolded inj_on_def, rule_format]
1.743 -      finite_space unfolding disjoint_family_on_def  prob_space[symmetric]
1.744 -    by (auto simp add:setsum_restrict_set)
1.745 -  also have "\<dots> = (\<Sum> y \<in> space M. prob {x})"
1.746 -    using prob_x by auto
1.747 -  also have "\<dots> = real_of_nat (card (space M)) * prob {x}" by simp
1.748 -  finally have one: "1 = real (card (space M)) * prob {x}"
1.749 -    using real_eq_of_nat by auto
1.750 -  hence two: "real (card (space M)) \<noteq> 0" by fastsimp
1.751 -  from one have three: "prob {x} \<noteq> 0" by fastsimp
1.752 -  thus ?thesis using one two three divide_cancel_right
1.753 -    by (auto simp:field_simps)
1.754 -qed
1.755 -
1.756 -lemma (in finite_information_space) finite_entropy_uniform_max:
1.757 -  assumes "\<And>x y. \<lbrakk> x \<in> X ` space M ; y \<in> X ` space M \<rbrakk> \<Longrightarrow> distribution X {x} = distribution X {y}"
1.758 -  shows "\<H>(X) = log b (real (card (X ` space M)))"
1.759 -proof -
1.760 -  note uniform =
1.761 -    finite_prob_space_of_images[of X, THEN finite_prob_space.uniform_prob, simplified]
1.762 -
1.763 -  have card_gt0: "0 < card (X ` space M)" unfolding card_gt_0_iff
1.764 -    using finite_space not_empty by auto
1.765 -
1.766 -  { fix x assume "x \<in> X ` space M"
1.767 -    hence "real (distribution X {x}) = 1 / real (card (X ` space M))"
1.768 -    proof (rule uniform)
1.769 -      fix x y assume "x \<in> X`space M" "y \<in> X`space M"
1.770 -      from assms[OF this] show "real (distribution X {x}) = real (distribution X {y})" by simp
1.771 -    qed }
1.772 -  thus ?thesis
1.773 -    using not_empty finite_space b_gt_1 card_gt0
1.774 -    by (simp add: entropy_eq real_eq_of_nat[symmetric] log_divide)
1.775 -qed
1.776 +section {* Partitioning *}
1.777
1.778  definition "subvimage A f g \<longleftrightarrow> (\<forall>x \<in> A. f -` {f x} \<inter> A \<subseteq> g -` {g x} \<inter> A)"
1.779
1.780 @@ -976,38 +930,6 @@
1.781    "\<H>(f \<circ> X) \<le> \<H>(X)"
1.782    by (subst (2) entropy_partition[of _ "f \<circ> X"]) (auto intro: conditional_entropy_positive)
1.783
1.784 -lemma (in prob_space) distribution_cong:
1.785 -  assumes "\<And>x. x \<in> space M \<Longrightarrow> X x = Y x"
1.786 -  shows "distribution X = distribution Y"
1.787 -  unfolding distribution_def expand_fun_eq
1.788 -  using assms by (auto intro!: arg_cong[where f="\<mu>"])
1.789 -
1.790 -lemma (in prob_space) joint_distribution_cong:
1.791 -  assumes "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
1.792 -  assumes "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
1.793 -  shows "joint_distribution X Y = joint_distribution X' Y'"
1.794 -  unfolding distribution_def expand_fun_eq
1.795 -  using assms by (auto intro!: arg_cong[where f="\<mu>"])
1.796 -
1.797 -lemma image_cong:
1.798 -  "\<lbrakk> \<And>x. x \<in> S \<Longrightarrow> X x = X' x \<rbrakk> \<Longrightarrow> X ` S = X' ` S"
1.799 -  by (auto intro!: image_eqI)
1.800 -
1.801 -lemma (in finite_information_space) mutual_information_cong:
1.802 -  assumes X: "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
1.803 -  assumes Y: "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
1.804 -  shows "\<I>(X ; Y) = \<I>(X' ; Y')"
1.805 -proof -
1.806 -  have "X ` space M = X' ` space M" using X by (rule image_cong)
1.807 -  moreover have "Y ` space M = Y' ` space M" using Y by (rule image_cong)
1.808 -  ultimately show ?thesis
1.809 -  unfolding mutual_information_eq
1.810 -    using
1.811 -      assms[THEN distribution_cong]
1.812 -      joint_distribution_cong[OF assms]
1.813 -    by (auto intro!: setsum_cong)
1.814 -qed
1.815 -
1.816  corollary (in finite_information_space) entropy_of_inj:
1.817    assumes "inj_on f (X`space M)"
1.818    shows "\<H>(f \<circ> X) = \<H>(X)"
```
```     2.1 --- a/src/HOL/Probability/Probability_Space.thy	Thu Sep 02 17:28:00 2010 +0200
2.2 +++ b/src/HOL/Probability/Probability_Space.thy	Thu Sep 02 19:51:53 2010 +0200
2.3 @@ -2,8 +2,6 @@
2.4  imports Lebesgue_Integration Radon_Nikodym
2.5  begin
2.6
2.7 -
2.8 -
2.9  locale prob_space = measure_space +
2.10    assumes measure_space_1: "\<mu> (space M) = 1"
2.11
2.12 @@ -33,6 +31,19 @@
2.13  abbreviation
2.14    "joint_distribution X Y \<equiv> distribution (\<lambda>x. (X x, Y x))"
2.15
2.16 +lemma (in prob_space) distribution_cong:
2.17 +  assumes "\<And>x. x \<in> space M \<Longrightarrow> X x = Y x"
2.18 +  shows "distribution X = distribution Y"
2.19 +  unfolding distribution_def expand_fun_eq
2.20 +  using assms by (auto intro!: arg_cong[where f="\<mu>"])
2.21 +
2.22 +lemma (in prob_space) joint_distribution_cong:
2.23 +  assumes "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
2.24 +  assumes "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
2.25 +  shows "joint_distribution X Y = joint_distribution X' Y'"
2.26 +  unfolding distribution_def expand_fun_eq
2.27 +  using assms by (auto intro!: arg_cong[where f="\<mu>"])
2.28 +
2.29  lemma prob_space: "prob (space M) = 1"
2.30    unfolding measure_space_1 by simp
2.31
2.32 @@ -327,18 +338,158 @@
2.33      joint_distribution_restriction_snd[of X Y "{(x, y)}"]
2.34    by auto
2.35
2.36 -lemma (in finite_prob_space) finite_prob_space_of_images:
2.37 -  "finite_prob_space \<lparr> space = X ` space M, sets = Pow (X ` space M)\<rparr> (distribution X)"
2.38 -  by (simp add: finite_prob_space_eq finite_measure_space)
2.39 +lemma (in finite_prob_space) distribution_mono:
2.40 +  assumes "\<And>t. \<lbrakk> t \<in> space M ; X t \<in> x \<rbrakk> \<Longrightarrow> Y t \<in> y"
2.41 +  shows "distribution X x \<le> distribution Y y"
2.42 +  unfolding distribution_def
2.43 +  using assms by (auto simp: sets_eq_Pow intro!: measure_mono)
2.44 +
2.45 +lemma (in finite_prob_space) distribution_mono_gt_0:
2.46 +  assumes gt_0: "0 < distribution X x"
2.47 +  assumes *: "\<And>t. \<lbrakk> t \<in> space M ; X t \<in> x \<rbrakk> \<Longrightarrow> Y t \<in> y"
2.48 +  shows "0 < distribution Y y"
2.49 +  by (rule less_le_trans[OF gt_0 distribution_mono]) (rule *)
2.50 +
2.51 +lemma (in finite_prob_space) sum_over_space_distrib:
2.52 +  "(\<Sum>x\<in>X`space M. distribution X {x}) = 1"
2.53 +  unfolding distribution_def measure_space_1[symmetric] using finite_space
2.54 +  by (subst measure_finitely_additive'')
2.55 +     (auto simp add: disjoint_family_on_def sets_eq_Pow intro!: arg_cong[where f=\<mu>])
2.56 +
2.57 +lemma (in finite_prob_space) sum_over_space_real_distribution:
2.58 +  "(\<Sum>x\<in>X`space M. real (distribution X {x})) = 1"
2.59 +  unfolding distribution_def prob_space[symmetric] using finite_space
2.60 +  by (subst real_finite_measure_finite_Union[symmetric])
2.61 +     (auto simp add: disjoint_family_on_def sets_eq_Pow intro!: arg_cong[where f=prob])
2.62 +
2.63 +lemma (in finite_prob_space) finite_sum_over_space_eq_1:
2.64 +  "(\<Sum>x\<in>space M. real (\<mu> {x})) = 1"
2.65 +  using sum_over_space_eq_1 finite_measure by (simp add: real_of_pinfreal_setsum sets_eq_Pow)
2.66 +
2.67 +lemma (in finite_prob_space) distribution_finite:
2.68 +  "distribution X A \<noteq> \<omega>"
2.69 +  using finite_measure[of "X -` A \<inter> space M"]
2.70 +  unfolding distribution_def sets_eq_Pow by auto
2.71 +
2.72 +lemma (in finite_prob_space) real_distribution_gt_0[simp]:
2.73 +  "0 < real (distribution Y y) \<longleftrightarrow>  0 < distribution Y y"
2.74 +  using assms by (auto intro!: real_pinfreal_pos distribution_finite)
2.75 +
2.76 +lemma (in finite_prob_space) real_distribution_mult_pos_pos:
2.77 +  assumes "0 < distribution Y y"
2.78 +  and "0 < distribution X x"
2.79 +  shows "0 < real (distribution Y y * distribution X x)"
2.80 +  unfolding real_of_pinfreal_mult[symmetric]
2.81 +  using assms by (auto intro!: mult_pos_pos)
2.82 +
2.83 +lemma (in finite_prob_space) real_distribution_divide_pos_pos:
2.84 +  assumes "0 < distribution Y y"
2.85 +  and "0 < distribution X x"
2.86 +  shows "0 < real (distribution Y y / distribution X x)"
2.87 +  unfolding divide_pinfreal_def real_of_pinfreal_mult[symmetric]
2.88 +  using assms distribution_finite[of X x] by (cases "distribution X x") (auto intro!: mult_pos_pos)
2.89 +
2.90 +lemma (in finite_prob_space) real_distribution_mult_inverse_pos_pos:
2.91 +  assumes "0 < distribution Y y"
2.92 +  and "0 < distribution X x"
2.93 +  shows "0 < real (distribution Y y * inverse (distribution X x))"
2.94 +  unfolding divide_pinfreal_def real_of_pinfreal_mult[symmetric]
2.95 +  using assms distribution_finite[of X x] by (cases "distribution X x") (auto intro!: mult_pos_pos)
2.96 +
2.97 +lemma (in prob_space) distribution_remove_const:
2.98 +  shows "joint_distribution X (\<lambda>x. ()) {(x, ())} = distribution X {x}"
2.99 +  and "joint_distribution (\<lambda>x. ()) X {((), x)} = distribution X {x}"
2.100 +  and "joint_distribution X (\<lambda>x. (Y x, ())) {(x, y, ())} = joint_distribution X Y {(x, y)}"
2.101 +  and "joint_distribution X (\<lambda>x. ((), Y x)) {(x, (), y)} = joint_distribution X Y {(x, y)}"
2.102 +  and "distribution (\<lambda>x. ()) {()} = 1"
2.103 +  unfolding measure_space_1[symmetric]
2.104 +  by (auto intro!: arg_cong[where f="\<mu>"] simp: distribution_def)
2.105
2.106 -lemma (in finite_prob_space) finite_product_prob_space_of_images:
2.107 -  "finite_prob_space \<lparr> space = X ` space M \<times> Y ` space M, sets = Pow (X ` space M \<times> Y ` space M)\<rparr>
2.108 -                     (joint_distribution X Y)"
2.109 -  (is "finite_prob_space ?S _")
2.110 -proof (simp add: finite_prob_space_eq finite_product_measure_space_of_images)
2.111 -  have "X -` X ` space M \<inter> Y -` Y ` space M \<inter> space M = space M" by auto
2.112 -  thus "joint_distribution X Y (X ` space M \<times> Y ` space M) = 1"
2.113 -    by (simp add: distribution_def prob_space vimage_Times comp_def measure_space_1)
2.114 +lemma (in finite_prob_space) setsum_distribution_gen:
2.115 +  assumes "Z -` {c} \<inter> space M = (\<Union>x \<in> X`space M. Y -` {f x}) \<inter> space M"
2.116 +  and "inj_on f (X`space M)"
2.117 +  shows "(\<Sum>x \<in> X`space M. distribution Y {f x}) = distribution Z {c}"
2.118 +  unfolding distribution_def assms
2.119 +  using finite_space assms
2.120 +  by (subst measure_finitely_additive'')
2.121 +     (auto simp add: disjoint_family_on_def sets_eq_Pow inj_on_def
2.122 +      intro!: arg_cong[where f=prob])
2.123 +
2.124 +lemma (in finite_prob_space) setsum_distribution:
2.125 +  "(\<Sum>x \<in> X`space M. joint_distribution X Y {(x, y)}) = distribution Y {y}"
2.126 +  "(\<Sum>y \<in> Y`space M. joint_distribution X Y {(x, y)}) = distribution X {x}"
2.127 +  "(\<Sum>x \<in> X`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution Y Z {(y, z)}"
2.128 +  "(\<Sum>y \<in> Y`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution X Z {(x, z)}"
2.129 +  "(\<Sum>z \<in> Z`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution X Y {(x, y)}"
2.130 +  by (auto intro!: inj_onI setsum_distribution_gen)
2.131 +
2.132 +lemma (in finite_prob_space) setsum_real_distribution_gen:
2.133 +  assumes "Z -` {c} \<inter> space M = (\<Union>x \<in> X`space M. Y -` {f x}) \<inter> space M"
2.134 +  and "inj_on f (X`space M)"
2.135 +  shows "(\<Sum>x \<in> X`space M. real (distribution Y {f x})) = real (distribution Z {c})"
2.136 +  unfolding distribution_def assms
2.137 +  using finite_space assms
2.138 +  by (subst real_finite_measure_finite_Union[symmetric])
2.139 +     (auto simp add: disjoint_family_on_def sets_eq_Pow inj_on_def
2.140 +        intro!: arg_cong[where f=prob])
2.141 +
2.142 +lemma (in finite_prob_space) setsum_real_distribution:
2.143 +  "(\<Sum>x \<in> X`space M. real (joint_distribution X Y {(x, y)})) = real (distribution Y {y})"
2.144 +  "(\<Sum>y \<in> Y`space M. real (joint_distribution X Y {(x, y)})) = real (distribution X {x})"
2.145 +  "(\<Sum>x \<in> X`space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)})) = real (joint_distribution Y Z {(y, z)})"
2.146 +  "(\<Sum>y \<in> Y`space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)})) = real (joint_distribution X Z {(x, z)})"
2.147 +  "(\<Sum>z \<in> Z`space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)})) = real (joint_distribution X Y {(x, y)})"
2.148 +  by (auto intro!: inj_onI setsum_real_distribution_gen)
2.149 +
2.150 +lemma (in finite_prob_space) real_distribution_order:
2.151 +  shows "r \<le> real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r \<le> real (distribution X {x})"
2.152 +  and "r \<le> real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r \<le> real (distribution Y {y})"
2.153 +  and "r < real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r < real (distribution X {x})"
2.154 +  and "r < real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r < real (distribution Y {y})"
2.155 +  and "distribution X {x} = 0 \<Longrightarrow> real (joint_distribution X Y {(x, y)}) = 0"
2.156 +  and "distribution Y {y} = 0 \<Longrightarrow> real (joint_distribution X Y {(x, y)}) = 0"
2.157 +  using real_of_pinfreal_mono[OF distribution_finite joint_distribution_restriction_fst, of X Y "{(x, y)}"]
2.158 +  using real_of_pinfreal_mono[OF distribution_finite joint_distribution_restriction_snd, of X Y "{(x, y)}"]
2.159 +  using real_pinfreal_nonneg[of "joint_distribution X Y {(x, y)}"]
2.160 +  by auto
2.161 +
2.162 +lemma (in prob_space) joint_distribution_remove[simp]:
2.163 +    "joint_distribution X X {(x, x)} = distribution X {x}"
2.164 +  unfolding distribution_def by (auto intro!: arg_cong[where f="\<mu>"])
2.165 +
2.166 +lemma (in finite_prob_space) distribution_1:
2.167 +  "distribution X A \<le> 1"
2.168 +  unfolding distribution_def measure_space_1[symmetric]
2.169 +  by (auto intro!: measure_mono simp: sets_eq_Pow)
2.170 +
2.171 +lemma (in finite_prob_space) real_distribution_1:
2.172 +  "real (distribution X A) \<le> 1"
2.173 +  unfolding real_pinfreal_1[symmetric]
2.174 +  by (rule real_of_pinfreal_mono[OF _ distribution_1]) simp
2.175 +
2.176 +lemma (in finite_prob_space) uniform_prob:
2.177 +  assumes "x \<in> space M"
2.178 +  assumes "\<And> x y. \<lbrakk>x \<in> space M ; y \<in> space M\<rbrakk> \<Longrightarrow> prob {x} = prob {y}"
2.179 +  shows "prob {x} = 1 / real (card (space M))"
2.180 +proof -
2.181 +  have prob_x: "\<And> y. y \<in> space M \<Longrightarrow> prob {y} = prob {x}"
2.182 +    using assms(2)[OF _ `x \<in> space M`] by blast
2.183 +  have "1 = prob (space M)"
2.184 +    using prob_space by auto
2.185 +  also have "\<dots> = (\<Sum> x \<in> space M. prob {x})"
2.186 +    using real_finite_measure_finite_Union[of "space M" "\<lambda> x. {x}", simplified]
2.187 +      sets_eq_Pow inj_singleton[unfolded inj_on_def, rule_format]
2.188 +      finite_space unfolding disjoint_family_on_def  prob_space[symmetric]
2.189 +    by (auto simp add:setsum_restrict_set)
2.190 +  also have "\<dots> = (\<Sum> y \<in> space M. prob {x})"
2.191 +    using prob_x by auto
2.192 +  also have "\<dots> = real_of_nat (card (space M)) * prob {x}" by simp
2.193 +  finally have one: "1 = real (card (space M)) * prob {x}"
2.194 +    using real_eq_of_nat by auto
2.195 +  hence two: "real (card (space M)) \<noteq> 0" by fastsimp
2.196 +  from one have three: "prob {x} \<noteq> 0" by fastsimp
2.197 +  thus ?thesis using one two three divide_cancel_right
2.198 +    by (auto simp:field_simps)
2.199  qed
2.200
2.201  lemma (in prob_space) prob_space_subalgebra:
2.202 @@ -382,70 +533,54 @@
2.203  qed
2.204
2.205  lemma (in finite_prob_space) finite_measure_space:
2.206 +  fixes X :: "'a \<Rightarrow> 'x"
2.207    shows "finite_measure_space \<lparr>space = X ` space M, sets = Pow (X ` space M)\<rparr> (distribution X)"
2.208      (is "finite_measure_space ?S _")
2.209  proof (rule finite_measure_spaceI, simp_all)
2.210    show "finite (X ` space M)" using finite_space by simp
2.211 -
2.212 -  show "positive (distribution X)"
2.213 -    unfolding distribution_def positive_def using sets_eq_Pow by auto
2.214 -
2.215 -  show "additive ?S (distribution X)" unfolding additive_def distribution_def
2.216 -  proof (simp, safe)
2.217 -    fix x y
2.218 -    have x: "(X -` x) \<inter> space M \<in> sets M"
2.219 -      and y: "(X -` y) \<inter> space M \<in> sets M" using sets_eq_Pow by auto
2.220 -    assume "x \<inter> y = {}"
2.221 -    hence "X -` x \<inter> space M \<inter> (X -` y \<inter> space M) = {}" by auto
2.222 -    from additive[unfolded additive_def, rule_format, OF x y] this
2.223 -      finite_measure[OF x] finite_measure[OF y]
2.224 -    have "\<mu> (((X -` x) \<union> (X -` y)) \<inter> space M) =
2.225 -      \<mu> ((X -` x) \<inter> space M) + \<mu> ((X -` y) \<inter> space M)"
2.226 -      by (subst Int_Un_distrib2) auto
2.227 -    thus "\<mu> ((X -` x \<union> X -` y) \<inter> space M) = \<mu> (X -` x \<inter> space M) + \<mu> (X -` y \<inter> space M)"
2.228 -      by auto
2.229 -  qed
2.230 -
2.231 -  { fix x assume "x \<in> X ` space M" thus "distribution X {x} \<noteq> \<omega>"
2.232 -    unfolding distribution_def by (auto intro!: finite_measure simp: sets_eq_Pow) }
2.233 +next
2.234 +  fix A B :: "'x set" assume "A \<inter> B = {}"
2.235 +  then show "distribution X (A \<union> B) = distribution X A + distribution X B"
2.236 +    unfolding distribution_def
2.237 +    by (subst measure_additive)
2.238 +       (auto intro!: arg_cong[where f=\<mu>] simp: sets_eq_Pow)
2.239  qed
2.240
2.241 +lemma (in finite_prob_space) finite_prob_space_of_images:
2.242 +  "finite_prob_space \<lparr> space = X ` space M, sets = Pow (X ` space M)\<rparr> (distribution X)"
2.243 +  by (simp add: finite_prob_space_eq finite_measure_space)
2.244 +
2.245 +lemma (in prob_space) joint_distribution_commute:
2.246 +  "joint_distribution X Y x = joint_distribution Y X ((\<lambda>(x,y). (y,x))`x)"
2.247 +  unfolding distribution_def by (auto intro!: arg_cong[where f=\<mu>])
2.248 +
2.249 +lemma (in finite_prob_space) real_distribution_order':
2.250 +  shows "real (distribution X {x}) = 0 \<Longrightarrow> real (joint_distribution X Y {(x, y)}) = 0"
2.251 +  and "real (distribution Y {y}) = 0 \<Longrightarrow> real (joint_distribution X Y {(x, y)}) = 0"
2.252 +  using real_of_pinfreal_mono[OF distribution_finite joint_distribution_restriction_fst, of X Y "{(x, y)}"]
2.253 +  using real_of_pinfreal_mono[OF distribution_finite joint_distribution_restriction_snd, of X Y "{(x, y)}"]
2.254 +  using real_pinfreal_nonneg[of "joint_distribution X Y {(x, y)}"]
2.255 +  by auto
2.256 +
2.257  lemma (in finite_prob_space) finite_product_measure_space:
2.258 +  fixes X :: "'a \<Rightarrow> 'x" and Y :: "'a \<Rightarrow> 'y"
2.259    assumes "finite s1" "finite s2"
2.260    shows "finite_measure_space \<lparr> space = s1 \<times> s2, sets = Pow (s1 \<times> s2)\<rparr> (joint_distribution X Y)"
2.261      (is "finite_measure_space ?M ?D")
2.262 -proof (rule finite_Pow_additivity_sufficient)
2.263 -  show "positive ?D"
2.264 -    unfolding positive_def using assms sets_eq_Pow
2.265 -    by (simp add: distribution_def)
2.266 -
2.267 -  show "additive ?M ?D" unfolding additive_def
2.268 -  proof safe
2.269 -    fix x y
2.270 -    have A: "((\<lambda>x. (X x, Y x)) -` x) \<inter> space M \<in> sets M" using assms sets_eq_Pow by auto
2.271 -    have B: "((\<lambda>x. (X x, Y x)) -` y) \<inter> space M \<in> sets M" using assms sets_eq_Pow by auto
2.272 -    assume "x \<inter> y = {}"
2.273 -    hence "(\<lambda>x. (X x, Y x)) -` x \<inter> space M \<inter> ((\<lambda>x. (X x, Y x)) -` y \<inter> space M) = {}"
2.274 -      by auto
2.275 -    from additive[unfolded additive_def, rule_format, OF A B] this
2.276 -      finite_measure[OF A] finite_measure[OF B]
2.277 -    show "?D (x \<union> y) = ?D x + ?D y"
2.278 -      apply (simp add: distribution_def)
2.279 -      apply (subst Int_Un_distrib2)
2.280 -      by (auto simp: real_of_pinfreal_add)
2.281 -  qed
2.282 -
2.283 -  show "finite (space ?M)"
2.284 +proof (rule finite_measure_spaceI, simp_all)
2.285 +  show "finite (s1 \<times> s2)"
2.286      using assms by auto
2.287 -
2.288 -  show "sets ?M = Pow (space ?M)"
2.289 -    by simp
2.290 -
2.291 -  { fix x assume "x \<in> space ?M" thus "?D {x} \<noteq> \<omega>"
2.292 -    unfolding distribution_def by (auto intro!: finite_measure simp: sets_eq_Pow) }
2.293 +  show "joint_distribution X Y (s1\<times>s2) \<noteq> \<omega>"
2.294 +    using distribution_finite .
2.295 +next
2.296 +  fix A B :: "('x*'y) set" assume "A \<inter> B = {}"
2.297 +  then show "joint_distribution X Y (A \<union> B) = joint_distribution X Y A + joint_distribution X Y B"
2.298 +    unfolding distribution_def
2.299 +    by (subst measure_additive)
2.300 +       (auto intro!: arg_cong[where f=\<mu>] simp: sets_eq_Pow)
2.301  qed
2.302
2.303 -lemma (in finite_measure_space) finite_product_measure_space_of_images:
2.304 +lemma (in finite_prob_space) finite_product_measure_space_of_images:
2.305    shows "finite_measure_space \<lparr> space = X ` space M \<times> Y ` space M,
2.306                                  sets = Pow (X ` space M \<times> Y ` space M) \<rparr>
2.307                                (joint_distribution X Y)"
```
```     3.1 --- a/src/HOL/Probability/Product_Measure.thy	Thu Sep 02 17:28:00 2010 +0200
3.2 +++ b/src/HOL/Probability/Product_Measure.thy	Thu Sep 02 19:51:53 2010 +0200
3.3 @@ -315,7 +315,7 @@
3.4    from subst[OF this, of "\<lambda> M. A \<in> sets M", OF A]
3.5    show ?thesis by auto
3.6  qed
3.7 -
3.8 +(*
3.9  lemma
3.10    assumes sfin: "range A \<subseteq> sets M" "(\<Union>i. A i) = space M" "\<And> i :: nat. \<nu> (A i) < \<omega>"
3.11    assumes A: "\<And>  i. \<mu> (A i) = \<nu> (A i)" "\<And> i. A i \<subseteq> A (Suc i)"
3.12 @@ -345,7 +345,7 @@
3.13        apply (auto simp add:image_def) (* TODO *) sorry
3.14      show ?thesis sorry
3.15  qed
3.16 -
3.17 +*)
3.18  definition prod_sets where
3.19    "prod_sets A B = {z. \<exists>x \<in> A. \<exists>y \<in> B. z = x \<times> y}"
3.20
3.21 @@ -512,36 +512,25 @@
3.22  qed
3.23
3.24  lemma (in finite_measure_space) finite_measure_space_finite_prod_measure:
3.25 -  assumes "finite_measure_space N \<nu>"
3.26 +  fixes N :: "('c, 'd) algebra_scheme"
3.27 +  assumes N: "finite_measure_space N \<nu>"
3.28    shows "finite_measure_space (prod_measure_space M N) (prod_measure M \<mu> N \<nu>)"
3.29    unfolding finite_prod_measure_space[OF assms]
3.30 -proof (rule finite_measure_spaceI)
3.31 +proof (rule finite_measure_spaceI, simp_all)
3.32    interpret N: finite_measure_space N \<nu> by fact
3.33 -
3.34 -  let ?P = "\<lparr>space = space M \<times> space N, sets = Pow (space M \<times> space N)\<rparr>"
3.35 -  show "measure_space ?P (prod_measure M \<mu> N \<nu>)"
3.36 -  proof (rule sigma_algebra.finite_additivity_sufficient)
3.37 -    show "sigma_algebra ?P" by (rule sigma_algebra_Pow)
3.38 -    show "finite (space ?P)" using finite_space N.finite_space by auto
3.39 -    from finite_prod_measure_times[OF assms, of "{}" "{}"]
3.40 -    show "positive (prod_measure M \<mu> N \<nu>)"
3.41 -      unfolding positive_def by simp
3.42 +  show "finite (space M \<times> space N)" using finite_space N.finite_space by auto
3.43 +  show "prod_measure M \<mu> N \<nu> (space M \<times> space N) \<noteq> \<omega>"
3.44 +    using finite_prod_measure_times[OF N top N.top] by simp
3.45 +  show "prod_measure M \<mu> N \<nu> {} = 0"
3.46 +    using finite_prod_measure_times[OF N empty_sets N.empty_sets] by simp
3.47
3.48 -    show "additive ?P (prod_measure M \<mu> N \<nu>)"
3.49 -      unfolding additive_def
3.50 -      apply (auto simp add: sets_eq_Pow prod_measure_def positive_integral_add[symmetric]
3.51 -                  intro!: positive_integral_cong)
3.52 -      apply (subst N.measure_additive[symmetric])
3.53 -      by (auto simp: N.sets_eq_Pow sets_eq_Pow)
3.54 -  qed
3.55 -  show "finite (space ?P)" using finite_space N.finite_space by auto
3.56 -  show "sets ?P = Pow (space ?P)" by simp
3.57 -
3.58 -  fix x assume "x \<in> space ?P"
3.59 -  with finite_prod_measure_times[OF assms, of "{fst x}" "{snd x}"]
3.60 -    finite_measure[of "{fst x}"] N.finite_measure[of "{snd x}"]
3.61 -  show "prod_measure M \<mu> N \<nu> {x} \<noteq> \<omega>"
3.62 -    by (auto simp add: sets_eq_Pow N.sets_eq_Pow elim!: SigmaE)
3.63 +  fix A B :: "('a * 'c) set" assume "A \<inter> B = {}" "A \<subseteq> space M \<times> space N" "B \<subseteq> space M \<times> space N"
3.64 +  then show "prod_measure M \<mu> N \<nu> (A \<union> B) = prod_measure M \<mu> N \<nu> A + prod_measure M \<mu> N \<nu> B"
3.65 +    apply (auto simp add: sets_eq_Pow prod_measure_def positive_integral_add[symmetric]
3.66 +                intro!: positive_integral_cong)
3.67 +    apply (subst N.measure_additive)
3.68 +    apply (auto intro!: arg_cong[where f=\<mu>] simp: N.sets_eq_Pow sets_eq_Pow)
3.69 +    done
3.70  qed
3.71
3.72  lemma (in finite_measure_space) finite_measure_space_finite_prod_measure_alterantive:
3.73 @@ -551,4 +540,18 @@
3.74    unfolding finite_prod_measure_space[OF N, symmetric]
3.75    using finite_measure_space_finite_prod_measure[OF N] .
3.76
3.77 +lemma prod_measure_times_finite:
3.78 +  assumes fms: "finite_measure_space M \<mu>" "finite_measure_space N \<nu>" and a: "a \<in> space M \<times> space N"
3.79 +  shows "prod_measure M \<mu> N \<nu> {a} = \<mu> {fst a} * \<nu> {snd a}"
3.80 +proof (cases a)
3.81 +  case (Pair b c)
3.82 +  hence a_eq: "{a} = {b} \<times> {c}" by simp
3.83 +
3.84 +  interpret M: finite_measure_space M \<mu> by fact
3.85 +  interpret N: finite_measure_space N \<nu> by fact
3.86 +
3.87 +  from finite_measure_space.finite_prod_measure_times[OF fms, of "{b}" "{c}"] M.sets_eq_Pow N.sets_eq_Pow a Pair
3.88 +  show ?thesis unfolding a_eq by simp
3.89 +qed
3.90 +
3.91  end
```
```     4.1 --- a/src/HOL/Probability/Radon_Nikodym.thy	Thu Sep 02 17:28:00 2010 +0200
4.2 +++ b/src/HOL/Probability/Radon_Nikodym.thy	Thu Sep 02 19:51:53 2010 +0200
4.3 @@ -64,6 +64,30 @@
4.4  definition (in measure_space)
4.5    "absolutely_continuous \<nu> = (\<forall>N\<in>null_sets. \<nu> N = (0 :: pinfreal))"
4.6
4.7 +lemma (in finite_measure_space) absolutely_continuousI:
4.8 +  assumes "finite_measure_space M \<nu>"
4.9 +  assumes v: "\<And>x. \<lbrakk> x \<in> space M ; \<mu> {x} = 0 \<rbrakk> \<Longrightarrow> \<nu> {x} = 0"
4.10 +  shows "absolutely_continuous \<nu>"
4.11 +proof (unfold absolutely_continuous_def sets_eq_Pow, safe)
4.12 +  fix N assume "\<mu> N = 0" "N \<subseteq> space M"
4.13 +  interpret v: finite_measure_space M \<nu> by fact
4.14 +  have "\<nu> N = \<nu> (\<Union>x\<in>N. {x})" by simp
4.15 +  also have "\<dots> = (\<Sum>x\<in>N. \<nu> {x})"
4.16 +  proof (rule v.measure_finitely_additive''[symmetric])
4.17 +    show "finite N" using `N \<subseteq> space M` finite_space by (auto intro: finite_subset)
4.18 +    show "disjoint_family_on (\<lambda>i. {i}) N" unfolding disjoint_family_on_def by auto
4.19 +    fix x assume "x \<in> N" thus "{x} \<in> sets M" using `N \<subseteq> space M` sets_eq_Pow by auto
4.20 +  qed
4.21 +  also have "\<dots> = 0"
4.22 +  proof (safe intro!: setsum_0')
4.23 +    fix x assume "x \<in> N"
4.24 +    hence "\<mu> {x} \<le> \<mu> N" using sets_eq_Pow `N \<subseteq> space M` by (auto intro!: measure_mono)
4.25 +    hence "\<mu> {x} = 0" using `\<mu> N = 0` by simp
4.26 +    thus "\<nu> {x} = 0" using v[of x] `x \<in> N` `N \<subseteq> space M` by auto
4.27 +  qed
4.28 +  finally show "\<nu> N = 0" .
4.29 +qed
4.30 +
4.31  lemma (in finite_measure) Radon_Nikodym_aux_epsilon:
4.32    fixes e :: real assumes "0 < e"
4.33    assumes "finite_measure M s"
```