Moved lemmas to appropriate locations
authorhoelzl
Thu Sep 02 19:51:53 2010 +0200 (2010-09-02)
changeset 39097943c7b348524
parent 39096 111756225292
child 39098 21e9bd6cf0a8
Moved lemmas to appropriate locations
src/HOL/Probability/Information.thy
src/HOL/Probability/Probability_Space.thy
src/HOL/Probability/Product_Measure.thy
src/HOL/Probability/Radon_Nikodym.thy
     1.1 --- a/src/HOL/Probability/Information.thy	Thu Sep 02 17:28:00 2010 +0200
     1.2 +++ b/src/HOL/Probability/Information.thy	Thu Sep 02 19:51:53 2010 +0200
     1.3 @@ -2,11 +2,53 @@
     1.4  imports Probability_Space Product_Measure Convex Radon_Nikodym
     1.5  begin
     1.6  
     1.7 +lemma log_le: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log a x \<le> log a y"
     1.8 +  by (subst log_le_cancel_iff) auto
     1.9 +
    1.10 +lemma log_less: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x < y \<Longrightarrow> log a x < log a y"
    1.11 +  by (subst log_less_cancel_iff) auto
    1.12 +
    1.13 +lemma setsum_cartesian_product':
    1.14 +  "(\<Sum>x\<in>A \<times> B. f x) = (\<Sum>x\<in>A. setsum (\<lambda>y. f (x, y)) B)"
    1.15 +  unfolding setsum_cartesian_product by simp
    1.16 +
    1.17  lemma real_of_pinfreal_inverse[simp]:
    1.18    fixes X :: pinfreal
    1.19    shows "real (inverse X) = 1 / real X"
    1.20    by (cases X) (auto simp: inverse_eq_divide)
    1.21  
    1.22 +lemma (in finite_prob_space) finite_product_prob_space_of_images:
    1.23 +  "finite_prob_space \<lparr> space = X ` space M \<times> Y ` space M, sets = Pow (X ` space M \<times> Y ` space M)\<rparr>
    1.24 +                     (joint_distribution X Y)"
    1.25 +  (is "finite_prob_space ?S _")
    1.26 +proof (simp add: finite_prob_space_eq finite_product_measure_space_of_images)
    1.27 +  have "X -` X ` space M \<inter> Y -` Y ` space M \<inter> space M = space M" by auto
    1.28 +  thus "joint_distribution X Y (X ` space M \<times> Y ` space M) = 1"
    1.29 +    by (simp add: distribution_def prob_space vimage_Times comp_def measure_space_1)
    1.30 +qed
    1.31 +
    1.32 +lemma (in finite_prob_space) finite_measure_space_prod:
    1.33 +  assumes X: "finite_measure_space MX (distribution X)"
    1.34 +  assumes Y: "finite_measure_space MY (distribution Y)"
    1.35 +  shows "finite_measure_space (prod_measure_space MX MY) (joint_distribution X Y)"
    1.36 +    (is "finite_measure_space ?M ?D")
    1.37 +proof (intro finite_measure_spaceI)
    1.38 +  interpret X: finite_measure_space MX "distribution X" by fact
    1.39 +  interpret Y: finite_measure_space MY "distribution Y" by fact
    1.40 +  note finite_measure_space.finite_prod_measure_space[OF X Y, simp]
    1.41 +  show "finite (space ?M)" using X.finite_space Y.finite_space by auto
    1.42 +  show "joint_distribution X Y {} = 0" by simp
    1.43 +  show "sets ?M = Pow (space ?M)" by simp
    1.44 +  { fix x show "?D (space ?M) \<noteq> \<omega>" by (rule distribution_finite) }
    1.45 +  { fix A B assume "A \<subseteq> space ?M" "B \<subseteq> space ?M" "A \<inter> B = {}"
    1.46 +    have *: "(\<lambda>t. (X t, Y t)) -` (A \<union> B) \<inter> space M =
    1.47 +             (\<lambda>t. (X t, Y t)) -` A \<inter> space M \<union> (\<lambda>t. (X t, Y t)) -` B \<inter> space M"
    1.48 +      by auto
    1.49 +    show "?D (A \<union> B) = ?D A + ?D B" unfolding distribution_def *
    1.50 +      apply (rule measure_additive[symmetric])
    1.51 +      using `A \<inter> B = {}` by (auto simp: sets_eq_Pow) }
    1.52 +qed
    1.53 +
    1.54  section "Convex theory"
    1.55  
    1.56  lemma log_setsum:
    1.57 @@ -105,51 +147,19 @@
    1.58    finally show ?thesis .
    1.59  qed
    1.60  
    1.61 -lemma (in finite_prob_space) sum_over_space_distrib:
    1.62 -  "(\<Sum>x\<in>X`space M. distribution X {x}) = 1"
    1.63 -  unfolding distribution_def measure_space_1[symmetric] using finite_space
    1.64 -  by (subst measure_finitely_additive'')
    1.65 -     (auto simp add: disjoint_family_on_def sets_eq_Pow intro!: arg_cong[where f=\<mu>])
    1.66 -
    1.67 -lemma (in finite_prob_space) sum_over_space_real_distribution:
    1.68 -  "(\<Sum>x\<in>X`space M. real (distribution X {x})) = 1"
    1.69 -  unfolding distribution_def prob_space[symmetric] using finite_space
    1.70 -  by (subst real_finite_measure_finite_Union[symmetric])
    1.71 -     (auto simp add: disjoint_family_on_def sets_eq_Pow intro!: arg_cong[where f=prob])
    1.72 +lemma split_pairs:
    1.73 +  shows
    1.74 +    "((A, B) = X) \<longleftrightarrow> (fst X = A \<and> snd X = B)" and
    1.75 +    "(X = (A, B)) \<longleftrightarrow> (fst X = A \<and> snd X = B)" by auto
    1.76  
    1.77  section "Information theory"
    1.78  
    1.79 -definition
    1.80 -  "KL_divergence b M \<mu> \<nu> =
    1.81 -    measure_space.integral M \<mu> (\<lambda>x. log b (real (sigma_finite_measure.RN_deriv M \<nu> \<mu> x)))"
    1.82 -
    1.83  locale finite_information_space = finite_prob_space +
    1.84    fixes b :: real assumes b_gt_1: "1 < b"
    1.85  
    1.86 -lemma (in finite_prob_space) distribution_mono:
    1.87 -  assumes "\<And>t. \<lbrakk> t \<in> space M ; X t \<in> x \<rbrakk> \<Longrightarrow> Y t \<in> y"
    1.88 -  shows "distribution X x \<le> distribution Y y"
    1.89 -  unfolding distribution_def
    1.90 -  using assms by (auto simp: sets_eq_Pow intro!: measure_mono)
    1.91 -
    1.92 -lemma (in prob_space) distribution_remove_const:
    1.93 -  shows "joint_distribution X (\<lambda>x. ()) {(x, ())} = distribution X {x}"
    1.94 -  and "joint_distribution (\<lambda>x. ()) X {((), x)} = distribution X {x}"
    1.95 -  and "joint_distribution X (\<lambda>x. (Y x, ())) {(x, y, ())} = joint_distribution X Y {(x, y)}"
    1.96 -  and "joint_distribution X (\<lambda>x. ((), Y x)) {(x, (), y)} = joint_distribution X Y {(x, y)}"
    1.97 -  and "distribution (\<lambda>x. ()) {()} = 1"
    1.98 -  unfolding measure_space_1[symmetric]
    1.99 -  by (auto intro!: arg_cong[where f="\<mu>"] simp: distribution_def)
   1.100 -
   1.101  context finite_information_space
   1.102  begin
   1.103  
   1.104 -lemma distribution_mono_gt_0:
   1.105 -  assumes gt_0: "0 < distribution X x"
   1.106 -  assumes *: "\<And>t. \<lbrakk> t \<in> space M ; X t \<in> x \<rbrakk> \<Longrightarrow> Y t \<in> y"
   1.107 -  shows "0 < distribution Y y"
   1.108 -  by (rule less_le_trans[OF gt_0 distribution_mono]) (rule *)
   1.109 -
   1.110  lemma
   1.111    assumes "0 \<le> A" and pos: "0 < A \<Longrightarrow> 0 < B" "0 < A \<Longrightarrow> 0 < C"
   1.112    shows mult_log_mult: "A * log b (B * C) = A * log b B + A * log b C" (is "?mult")
   1.113 @@ -165,41 +175,6 @@
   1.114    thus ?mult and ?div by auto
   1.115  qed
   1.116  
   1.117 -lemma split_pairs:
   1.118 -  shows
   1.119 -    "((A, B) = X) \<longleftrightarrow> (fst X = A \<and> snd X = B)" and
   1.120 -    "(X = (A, B)) \<longleftrightarrow> (fst X = A \<and> snd X = B)" by auto
   1.121 -
   1.122 -lemma (in finite_information_space) distribution_finite:
   1.123 -  "distribution X A \<noteq> \<omega>"
   1.124 -  using measure_finite[of "X -` A \<inter> space M"]
   1.125 -  unfolding distribution_def sets_eq_Pow by auto
   1.126 -
   1.127 -lemma (in finite_information_space) real_distribution_gt_0[simp]:
   1.128 -  "0 < real (distribution Y y) \<longleftrightarrow>  0 < distribution Y y"
   1.129 -  using assms by (auto intro!: real_pinfreal_pos distribution_finite)
   1.130 -
   1.131 -lemma real_distribution_mult_pos_pos:
   1.132 -  assumes "0 < distribution Y y"
   1.133 -  and "0 < distribution X x"
   1.134 -  shows "0 < real (distribution Y y * distribution X x)"
   1.135 -  unfolding real_of_pinfreal_mult[symmetric]
   1.136 -  using assms by (auto intro!: mult_pos_pos)
   1.137 -
   1.138 -lemma real_distribution_divide_pos_pos:
   1.139 -  assumes "0 < distribution Y y"
   1.140 -  and "0 < distribution X x"
   1.141 -  shows "0 < real (distribution Y y / distribution X x)"
   1.142 -  unfolding divide_pinfreal_def real_of_pinfreal_mult[symmetric]
   1.143 -  using assms distribution_finite[of X x] by (cases "distribution X x") (auto intro!: mult_pos_pos)
   1.144 -
   1.145 -lemma real_distribution_mult_inverse_pos_pos:
   1.146 -  assumes "0 < distribution Y y"
   1.147 -  and "0 < distribution X x"
   1.148 -  shows "0 < real (distribution Y y * inverse (distribution X x))"
   1.149 -  unfolding divide_pinfreal_def real_of_pinfreal_mult[symmetric]
   1.150 -  using assms distribution_finite[of X x] by (cases "distribution X x") (auto intro!: mult_pos_pos)
   1.151 -
   1.152  ML {*
   1.153  
   1.154    (* tactic to solve equations of the form @{term "W * log b (X / (Y * Z)) = W * log b X - W * log b (Y * Z)"}
   1.155 @@ -252,31 +227,14 @@
   1.156  
   1.157  end
   1.158  
   1.159 -lemma (in finite_measure_space) absolutely_continuousI:
   1.160 -  assumes "finite_measure_space M \<nu>"
   1.161 -  assumes v: "\<And>x. \<lbrakk> x \<in> space M ; \<mu> {x} = 0 \<rbrakk> \<Longrightarrow> \<nu> {x} = 0"
   1.162 -  shows "absolutely_continuous \<nu>"
   1.163 -proof (unfold absolutely_continuous_def sets_eq_Pow, safe)
   1.164 -  fix N assume "\<mu> N = 0" "N \<subseteq> space M"
   1.165 -
   1.166 -  interpret v: finite_measure_space M \<nu> by fact
   1.167 +subsection "Kullback$-$Leibler divergence"
   1.168  
   1.169 -  have "\<nu> N = \<nu> (\<Union>x\<in>N. {x})" by simp
   1.170 -  also have "\<dots> = (\<Sum>x\<in>N. \<nu> {x})"
   1.171 -  proof (rule v.measure_finitely_additive''[symmetric])
   1.172 -    show "finite N" using `N \<subseteq> space M` finite_space by (auto intro: finite_subset)
   1.173 -    show "disjoint_family_on (\<lambda>i. {i}) N" unfolding disjoint_family_on_def by auto
   1.174 -    fix x assume "x \<in> N" thus "{x} \<in> sets M" using `N \<subseteq> space M` sets_eq_Pow by auto
   1.175 -  qed
   1.176 -  also have "\<dots> = 0"
   1.177 -  proof (safe intro!: setsum_0')
   1.178 -    fix x assume "x \<in> N"
   1.179 -    hence "\<mu> {x} \<le> \<mu> N" using sets_eq_Pow `N \<subseteq> space M` by (auto intro!: measure_mono)
   1.180 -    hence "\<mu> {x} = 0" using `\<mu> N = 0` by simp
   1.181 -    thus "\<nu> {x} = 0" using v[of x] `x \<in> N` `N \<subseteq> space M` by auto
   1.182 -  qed
   1.183 -  finally show "\<nu> N = 0" .
   1.184 -qed
   1.185 +text {* The Kullback$-$Leibler divergence is also known as relative entropy or
   1.186 +Kullback$-$Leibler distance. *}
   1.187 +
   1.188 +definition
   1.189 +  "KL_divergence b M \<mu> \<nu> =
   1.190 +    measure_space.integral M \<mu> (\<lambda>x. log b (real (sigma_finite_measure.RN_deriv M \<nu> \<mu> x)))"
   1.191  
   1.192  lemma (in finite_measure_space) KL_divergence_eq_finite:
   1.193    assumes v: "finite_measure_space M \<nu>"
   1.194 @@ -285,19 +243,13 @@
   1.195  proof (simp add: KL_divergence_def finite_measure_space.integral_finite_singleton[OF v])
   1.196    interpret v: finite_measure_space M \<nu> by fact
   1.197    have ms: "measure_space M \<nu>" by fact
   1.198 -
   1.199    have ac: "absolutely_continuous \<nu>"
   1.200      using ac by (auto intro!: absolutely_continuousI[OF v])
   1.201 -
   1.202    show "(\<Sum>x \<in> space M. log b (real (RN_deriv \<nu> x)) * real (\<nu> {x})) = ?sum"
   1.203      using RN_deriv_finite_measure[OF ms ac]
   1.204      by (auto intro!: setsum_cong simp: field_simps real_of_pinfreal_mult[symmetric])
   1.205  qed
   1.206  
   1.207 -lemma (in finite_prob_space) finite_sum_over_space_eq_1:
   1.208 -  "(\<Sum>x\<in>space M. real (\<mu> {x})) = 1"
   1.209 -  using sum_over_space_eq_1 finite_measure by (simp add: real_of_pinfreal_setsum sets_eq_Pow)
   1.210 -
   1.211  lemma (in finite_prob_space) KL_divergence_positive_finite:
   1.212    assumes v: "finite_prob_space M \<nu>"
   1.213    assumes ac: "\<And>x. \<lbrakk> x \<in> space M ; \<mu> {x} = 0 \<rbrakk> \<Longrightarrow> \<nu> {x} = 0"
   1.214 @@ -322,13 +274,15 @@
   1.215        fix x assume x: "x \<in> space M"
   1.216        { assume "0 < real (\<nu> {x})"
   1.217          hence "\<mu> {x} \<noteq> 0" using ac[OF x] by auto
   1.218 -        thus "0 < prob {x}" using measure_finite[of "{x}"] sets_eq_Pow x
   1.219 +        thus "0 < prob {x}" using finite_measure[of "{x}"] sets_eq_Pow x
   1.220            by (cases "\<mu> {x}") simp_all }
   1.221      qed auto
   1.222    qed
   1.223    thus "0 \<le> KL_divergence b M \<nu> \<mu>" using finite_sum_over_space_eq_1 by simp
   1.224  qed
   1.225  
   1.226 +subsection {* Mutual Information *}
   1.227 +
   1.228  definition (in prob_space)
   1.229    "mutual_information b S T X Y =
   1.230      KL_divergence b (prod_measure_space S T)
   1.231 @@ -341,24 +295,6 @@
   1.232      \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr>
   1.233      \<lparr> space = Y`space M, sets = Pow (Y`space M) \<rparr> X Y"
   1.234  
   1.235 -lemma prod_measure_times_finite:
   1.236 -  assumes fms: "finite_measure_space M \<mu>" "finite_measure_space N \<nu>" and a: "a \<in> space M \<times> space N"
   1.237 -  shows "prod_measure M \<mu> N \<nu> {a} = \<mu> {fst a} * \<nu> {snd a}"
   1.238 -proof (cases a)
   1.239 -  case (Pair b c)
   1.240 -  hence a_eq: "{a} = {b} \<times> {c}" by simp
   1.241 -
   1.242 -  interpret M: finite_measure_space M \<mu> by fact
   1.243 -  interpret N: finite_measure_space N \<nu> by fact
   1.244 -
   1.245 -  from finite_measure_space.finite_prod_measure_times[OF fms, of "{b}" "{c}"] M.sets_eq_Pow N.sets_eq_Pow a Pair
   1.246 -  show ?thesis unfolding a_eq by simp
   1.247 -qed
   1.248 -
   1.249 -lemma setsum_cartesian_product':
   1.250 -  "(\<Sum>x\<in>A \<times> B. f x) = (\<Sum>x\<in>A. setsum (\<lambda>y. f (x, y)) B)"
   1.251 -  unfolding setsum_cartesian_product by simp
   1.252 -
   1.253  lemma (in finite_information_space) mutual_information_generic_eq:
   1.254    assumes MX: "finite_measure_space MX (distribution X)"
   1.255    assumes MY: "finite_measure_space MY (distribution Y)"
   1.256 @@ -478,9 +414,26 @@
   1.257                                                     (real (distribution X {x}) * real (distribution Y {y}))))"
   1.258    by (subst mutual_information_eq_generic) (simp_all add: finite_prob_space_of_images)
   1.259  
   1.260 +lemma (in finite_information_space) mutual_information_cong:
   1.261 +  assumes X: "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
   1.262 +  assumes Y: "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
   1.263 +  shows "\<I>(X ; Y) = \<I>(X' ; Y')"
   1.264 +proof -
   1.265 +  have "X ` space M = X' ` space M" using X by (auto intro!: image_eqI)
   1.266 +  moreover have "Y ` space M = Y' ` space M" using Y by (auto intro!: image_eqI)
   1.267 +  ultimately show ?thesis
   1.268 +  unfolding mutual_information_eq
   1.269 +    using
   1.270 +      assms[THEN distribution_cong]
   1.271 +      joint_distribution_cong[OF assms]
   1.272 +    by (auto intro!: setsum_cong)
   1.273 +qed
   1.274 +
   1.275  lemma (in finite_information_space) mutual_information_positive: "0 \<le> \<I>(X;Y)"
   1.276    by (subst mutual_information_positive_generic) (simp_all add: finite_prob_space_of_images)
   1.277  
   1.278 +subsection {* Entropy *}
   1.279 +
   1.280  definition (in prob_space)
   1.281    "entropy b s X = mutual_information b s s X X"
   1.282  
   1.283 @@ -488,32 +441,146 @@
   1.284    finite_entropy ("\<H>'(_')") where
   1.285    "\<H>(X) \<equiv> entropy b \<lparr> space = X`space M, sets = Pow (X`space M) \<rparr> X"
   1.286  
   1.287 -lemma (in finite_information_space) joint_distribution_remove[simp]:
   1.288 -    "joint_distribution X X {(x, x)} = distribution X {x}"
   1.289 -  unfolding distribution_def by (auto intro!: arg_cong[where f="\<mu>"])
   1.290 +lemma (in finite_information_space) entropy_generic_eq:
   1.291 +  assumes MX: "finite_measure_space MX (distribution X)"
   1.292 +  shows "entropy b MX X = -(\<Sum> x \<in> space MX. real (distribution X {x}) * log b (real (distribution X {x})))"
   1.293 +proof -
   1.294 +  let "?X x" = "real (distribution X {x})"
   1.295 +  let "?XX x y" = "real (joint_distribution X X {(x, y)})"
   1.296 +  interpret MX: finite_measure_space MX "distribution X" by fact
   1.297 +  { fix x y
   1.298 +    have "(\<lambda>x. (X x, X x)) -` {(x, y)} = (if x = y then X -` {x} else {})" by auto
   1.299 +    then have "?XX x y * log b (?XX x y / (?X x * ?X y)) =
   1.300 +        (if x = y then - ?X y * log b (?X y) else 0)"
   1.301 +      unfolding distribution_def by (auto simp: mult_log_divide) }
   1.302 +  note remove_XX = this
   1.303 +  show ?thesis
   1.304 +    unfolding entropy_def mutual_information_generic_eq[OF MX MX]
   1.305 +    unfolding setsum_cartesian_product[symmetric] setsum_negf[symmetric] remove_XX
   1.306 +    by (auto simp: setsum_cases MX.finite_space)
   1.307 +qed
   1.308  
   1.309  lemma (in finite_information_space) entropy_eq:
   1.310    "\<H>(X) = -(\<Sum> x \<in> X ` space M. real (distribution X {x}) * log b (real (distribution X {x})))"
   1.311 -proof -
   1.312 -  { fix f
   1.313 -    { fix x y
   1.314 -      have "(\<lambda>x. (X x, X x)) -` {(x, y)} = (if x = y then X -` {x} else {})" by auto
   1.315 -        hence "real (distribution (\<lambda>x. (X x, X x))  {(x,y)}) * f x y =
   1.316 -            (if x = y then real (distribution X {x}) * f x y else 0)"
   1.317 -        unfolding distribution_def by auto }
   1.318 -    hence "(\<Sum>(x, y) \<in> X ` space M \<times> X ` space M. real (joint_distribution X X {(x, y)}) * f x y) =
   1.319 -      (\<Sum>x \<in> X ` space M. real (distribution X {x}) * f x x)"
   1.320 -      unfolding setsum_cartesian_product' by (simp add: setsum_cases finite_space) }
   1.321 -  note remove_cartesian_product = this
   1.322 -
   1.323 -  show ?thesis
   1.324 -    unfolding entropy_def mutual_information_eq setsum_negf[symmetric] remove_cartesian_product
   1.325 -    by (auto intro!: setsum_cong)
   1.326 -qed
   1.327 +  by (simp add: finite_measure_space entropy_generic_eq)
   1.328  
   1.329  lemma (in finite_information_space) entropy_positive: "0 \<le> \<H>(X)"
   1.330    unfolding entropy_def using mutual_information_positive .
   1.331  
   1.332 +lemma (in finite_information_space) entropy_certainty_eq_0:
   1.333 +  assumes "x \<in> X ` space M" and "distribution X {x} = 1"
   1.334 +  shows "\<H>(X) = 0"
   1.335 +proof -
   1.336 +  interpret X: finite_prob_space "\<lparr> space = X ` space M, sets = Pow (X ` space M) \<rparr>" "distribution X"
   1.337 +    by (rule finite_prob_space_of_images)
   1.338 +
   1.339 +  have "distribution X (X ` space M - {x}) = distribution X (X ` space M) - distribution X {x}"
   1.340 +    using X.measure_compl[of "{x}"] assms by auto
   1.341 +  also have "\<dots> = 0" using X.prob_space assms by auto
   1.342 +  finally have X0: "distribution X (X ` space M - {x}) = 0" by auto
   1.343 +
   1.344 +  { fix y assume asm: "y \<noteq> x" "y \<in> X ` space M"
   1.345 +    hence "{y} \<subseteq> X ` space M - {x}" by auto
   1.346 +    from X.measure_mono[OF this] X0 asm
   1.347 +    have "distribution X {y} = 0" by auto }
   1.348 +
   1.349 +  hence fi: "\<And> y. y \<in> X ` space M \<Longrightarrow> real (distribution X {y}) = (if x = y then 1 else 0)"
   1.350 +    using assms by auto
   1.351 +
   1.352 +  have y: "\<And>y. (if x = y then 1 else 0) * log b (if x = y then 1 else 0) = 0" by simp
   1.353 +
   1.354 +  show ?thesis unfolding entropy_eq by (auto simp: y fi)
   1.355 +qed
   1.356 +
   1.357 +lemma (in finite_information_space) entropy_le_card_not_0:
   1.358 +  "\<H>(X) \<le> log b (real (card (X ` space M \<inter> {x . distribution X {x} \<noteq> 0})))"
   1.359 +proof -
   1.360 +  let "?d x" = "distribution X {x}"
   1.361 +  let "?p x" = "real (?d x)"
   1.362 +  have "\<H>(X) = (\<Sum>x\<in>X`space M. ?p x * log b (1 / ?p x))"
   1.363 +    by (auto intro!: setsum_cong simp: entropy_eq setsum_negf[symmetric])
   1.364 +  also have "\<dots> \<le> log b (\<Sum>x\<in>X`space M. ?p x * (1 / ?p x))"
   1.365 +    apply (rule log_setsum')
   1.366 +    using not_empty b_gt_1 finite_space sum_over_space_real_distribution
   1.367 +    by auto
   1.368 +  also have "\<dots> = log b (\<Sum>x\<in>X`space M. if ?d x \<noteq> 0 then 1 else 0)"
   1.369 +    apply (rule arg_cong[where f="\<lambda>f. log b (\<Sum>x\<in>X`space M. f x)"])
   1.370 +    using distribution_finite[of X] by (auto simp: expand_fun_eq real_of_pinfreal_eq_0)
   1.371 +  finally show ?thesis
   1.372 +    using finite_space by (auto simp: setsum_cases real_eq_of_nat)
   1.373 +qed
   1.374 +
   1.375 +lemma (in finite_information_space) entropy_uniform_max:
   1.376 +  assumes "\<And>x y. \<lbrakk> x \<in> X ` space M ; y \<in> X ` space M \<rbrakk> \<Longrightarrow> distribution X {x} = distribution X {y}"
   1.377 +  shows "\<H>(X) = log b (real (card (X ` space M)))"
   1.378 +proof -
   1.379 +  note uniform =
   1.380 +    finite_prob_space_of_images[of X, THEN finite_prob_space.uniform_prob, simplified]
   1.381 +
   1.382 +  have card_gt0: "0 < card (X ` space M)" unfolding card_gt_0_iff
   1.383 +    using finite_space not_empty by auto
   1.384 +
   1.385 +  { fix x assume "x \<in> X ` space M"
   1.386 +    hence "real (distribution X {x}) = 1 / real (card (X ` space M))"
   1.387 +    proof (rule uniform)
   1.388 +      fix x y assume "x \<in> X`space M" "y \<in> X`space M"
   1.389 +      from assms[OF this] show "real (distribution X {x}) = real (distribution X {y})" by simp
   1.390 +    qed }
   1.391 +  thus ?thesis
   1.392 +    using not_empty finite_space b_gt_1 card_gt0
   1.393 +    by (simp add: entropy_eq real_eq_of_nat[symmetric] log_divide)
   1.394 +qed
   1.395 +
   1.396 +lemma (in finite_information_space) entropy_le_card:
   1.397 +  "\<H>(X) \<le> log b (real (card (X ` space M)))"
   1.398 +proof cases
   1.399 +  assume "X ` space M \<inter> {x. distribution X {x} \<noteq> 0} = {}"
   1.400 +  then have "\<And>x. x\<in>X`space M \<Longrightarrow> distribution X {x} = 0" by auto
   1.401 +  moreover
   1.402 +  have "0 < card (X`space M)"
   1.403 +    using finite_space not_empty unfolding card_gt_0_iff by auto
   1.404 +  then have "log b 1 \<le> log b (real (card (X`space M)))"
   1.405 +    using b_gt_1 by (intro log_le) auto
   1.406 +  ultimately show ?thesis unfolding entropy_eq by simp
   1.407 +next
   1.408 +  assume False: "X ` space M \<inter> {x. distribution X {x} \<noteq> 0} \<noteq> {}"
   1.409 +  have "card (X ` space M \<inter> {x. distribution X {x} \<noteq> 0}) \<le> card (X ` space M)"
   1.410 +    (is "?A \<le> ?B") using finite_space not_empty by (auto intro!: card_mono)
   1.411 +  note entropy_le_card_not_0
   1.412 +  also have "log b (real ?A) \<le> log b (real ?B)"
   1.413 +    using b_gt_1 False finite_space not_empty `?A \<le> ?B`
   1.414 +    by (auto intro!: log_le simp: card_gt_0_iff)
   1.415 +  finally show ?thesis .
   1.416 +qed
   1.417 +
   1.418 +lemma (in finite_information_space) entropy_commute:
   1.419 +  "\<H>(\<lambda>x. (X x, Y x)) = \<H>(\<lambda>x. (Y x, X x))"
   1.420 +proof -
   1.421 +  have *: "(\<lambda>x. (Y x, X x))`space M = (\<lambda>(a,b). (b,a))`(\<lambda>x. (X x, Y x))`space M"
   1.422 +    by auto
   1.423 +  have inj: "\<And>X. inj_on (\<lambda>(a,b). (b,a)) X"
   1.424 +    by (auto intro!: inj_onI)
   1.425 +  show ?thesis
   1.426 +    unfolding entropy_eq unfolding * setsum_reindex[OF inj]
   1.427 +    by (simp add: joint_distribution_commute[of Y X] split_beta)
   1.428 +qed
   1.429 +
   1.430 +lemma (in finite_information_space) entropy_eq_cartesian_sum:
   1.431 +  "\<H>(\<lambda>x. (X x, Y x)) = -(\<Sum>x\<in>X`space M. \<Sum>y\<in>Y`space M.
   1.432 +    real (joint_distribution X Y {(x,y)}) *
   1.433 +    log b (real (joint_distribution X Y {(x,y)})))"
   1.434 +proof -
   1.435 +  { fix x assume "x\<notin>(\<lambda>x. (X x, Y x))`space M"
   1.436 +    then have "(\<lambda>x. (X x, Y x)) -` {x} \<inter> space M = {}" by auto
   1.437 +    then have "joint_distribution X Y {x} = 0"
   1.438 +      unfolding distribution_def by auto }
   1.439 +  then show ?thesis using finite_space
   1.440 +    unfolding entropy_eq neg_equal_iff_equal setsum_cartesian_product
   1.441 +    by (auto intro!: setsum_mono_zero_cong_left)
   1.442 +qed
   1.443 +
   1.444 +subsection {* Conditional Mutual Information *}
   1.445 +
   1.446  definition (in prob_space)
   1.447    "conditional_mutual_information b M1 M2 M3 X Y Z \<equiv>
   1.448      mutual_information b M1 (prod_measure_space M2 M3) X (\<lambda>x. (Y x, Z x)) -
   1.449 @@ -527,87 +594,32 @@
   1.450      \<lparr> space = Z`space M, sets = Pow (Z`space M) \<rparr>
   1.451      X Y Z"
   1.452  
   1.453 -lemma (in finite_information_space) setsum_distribution_gen:
   1.454 -  assumes "Z -` {c} \<inter> space M = (\<Union>x \<in> X`space M. Y -` {f x}) \<inter> space M"
   1.455 -  and "inj_on f (X`space M)"
   1.456 -  shows "(\<Sum>x \<in> X`space M. distribution Y {f x}) = distribution Z {c}"
   1.457 -  unfolding distribution_def assms
   1.458 -  using finite_space assms
   1.459 -  by (subst measure_finitely_additive'')
   1.460 -     (auto simp add: disjoint_family_on_def sets_eq_Pow inj_on_def
   1.461 -      intro!: arg_cong[where f=prob])
   1.462 -
   1.463 -lemma (in finite_information_space) setsum_distribution:
   1.464 -  "(\<Sum>x \<in> X`space M. joint_distribution X Y {(x, y)}) = distribution Y {y}"
   1.465 -  "(\<Sum>y \<in> Y`space M. joint_distribution X Y {(x, y)}) = distribution X {x}"
   1.466 -  "(\<Sum>x \<in> X`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution Y Z {(y, z)}"
   1.467 -  "(\<Sum>y \<in> Y`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution X Z {(x, z)}"
   1.468 -  "(\<Sum>z \<in> Z`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution X Y {(x, y)}"
   1.469 -  by (auto intro!: inj_onI setsum_distribution_gen)
   1.470 -
   1.471 -lemma (in finite_information_space) setsum_real_distribution_gen:
   1.472 -  assumes "Z -` {c} \<inter> space M = (\<Union>x \<in> X`space M. Y -` {f x}) \<inter> space M"
   1.473 -  and "inj_on f (X`space M)"
   1.474 -  shows "(\<Sum>x \<in> X`space M. real (distribution Y {f x})) = real (distribution Z {c})"
   1.475 -  unfolding distribution_def assms
   1.476 -  using finite_space assms
   1.477 -  by (subst real_finite_measure_finite_Union[symmetric])
   1.478 -     (auto simp add: disjoint_family_on_def sets_eq_Pow inj_on_def
   1.479 -        intro!: arg_cong[where f=prob])
   1.480 -
   1.481 -lemma (in finite_information_space) setsum_real_distribution:
   1.482 -  "(\<Sum>x \<in> X`space M. real (joint_distribution X Y {(x, y)})) = real (distribution Y {y})"
   1.483 -  "(\<Sum>y \<in> Y`space M. real (joint_distribution X Y {(x, y)})) = real (distribution X {x})"
   1.484 -  "(\<Sum>x \<in> X`space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)})) = real (joint_distribution Y Z {(y, z)})"
   1.485 -  "(\<Sum>y \<in> Y`space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)})) = real (joint_distribution X Z {(x, z)})"
   1.486 -  "(\<Sum>z \<in> Z`space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)})) = real (joint_distribution X Y {(x, y)})"
   1.487 -  by (auto intro!: inj_onI setsum_real_distribution_gen)
   1.488 +lemma (in finite_information_space) conditional_mutual_information_generic_eq:
   1.489 +  assumes MX: "finite_measure_space MX (distribution X)"
   1.490 +  assumes MY: "finite_measure_space MY (distribution Y)"
   1.491 +  assumes MZ: "finite_measure_space MZ (distribution Z)"
   1.492 +  shows "conditional_mutual_information b MX MY MZ X Y Z =
   1.493 +    (\<Sum>(x, y, z)\<in>space MX \<times> space MY \<times> space MZ.
   1.494 +      real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) *
   1.495 +      log b (real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) /
   1.496 +                   (real (distribution X {x}) * real (joint_distribution Y Z {(y, z)})))) -
   1.497 +    (\<Sum>(x, y)\<in>space MX \<times> space MZ.
   1.498 +      real (joint_distribution X Z {(x, y)}) *
   1.499 +      log b (real (joint_distribution X Z {(x, y)}) / (real (distribution X {x}) * real (distribution Z {y}))))"
   1.500 +  using assms finite_measure_space_prod[OF MY MZ]
   1.501 +  unfolding conditional_mutual_information_def
   1.502 +  by (subst (1 2) mutual_information_generic_eq)
   1.503 +     (simp_all add: setsum_cartesian_product' finite_measure_space.finite_prod_measure_space)
   1.504  
   1.505 -lemma (in finite_information_space) conditional_mutual_information_eq_sum:
   1.506 -   "\<I>(X ; Y | Z) =
   1.507 -     (\<Sum>(x, y, z)\<in>X ` space M \<times> (\<lambda>x. (Y x, Z x)) ` space M.
   1.508 -             real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) *
   1.509 -             log b (real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)})/
   1.510 -        real (distribution (\<lambda>x. (Y x, Z x)) {(y, z)}))) -
   1.511 -     (\<Sum>(x, z)\<in>X ` space M \<times> Z ` space M.
   1.512 -        real (distribution (\<lambda>x. (X x, Z x)) {(x,z)}) * log b (real (distribution (\<lambda>x. (X x, Z x)) {(x,z)}) / real (distribution Z {z})))"
   1.513 -  (is "_ = ?rhs")
   1.514 -proof -
   1.515 -  have setsum_product:
   1.516 -    "\<And>f x. (\<Sum>v\<in>(\<lambda>x. (Y x, Z x)) ` space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x,v)}) * f v)
   1.517 -      = (\<Sum>v\<in>Y ` space M \<times> Z ` space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x,v)}) * f v)"
   1.518 -  proof (safe intro!: setsum_mono_zero_cong_left imageI)
   1.519 -    fix x y z f
   1.520 -    assume *: "(Y y, Z z) \<notin> (\<lambda>x. (Y x, Z x)) ` space M" and "y \<in> space M" "z \<in> space M"
   1.521 -    hence "(\<lambda>x. (X x, Y x, Z x)) -` {(x, Y y, Z z)} \<inter> space M = {}"
   1.522 -    proof safe
   1.523 -      fix x' assume x': "x' \<in> space M" and eq: "Y x' = Y y" "Z x' = Z z"
   1.524 -      have "(Y y, Z z) \<in> (\<lambda>x. (Y x, Z x)) ` space M" using eq[symmetric] x' by auto
   1.525 -      thus "x' \<in> {}" using * by auto
   1.526 -    qed
   1.527 -    thus "real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, Y y, Z z)}) * f (Y y) (Z z) = 0"
   1.528 -      unfolding distribution_def by simp
   1.529 -  qed (simp add: finite_space)
   1.530 -
   1.531 -  thus ?thesis
   1.532 -    unfolding conditional_mutual_information_def Let_def mutual_information_eq
   1.533 -    by (subst mutual_information_eq_generic)
   1.534 -       (auto simp: prod_measure_space_def sigma_prod_sets_finite finite_space sigma_def
   1.535 -        finite_prob_space_of_images finite_product_prob_space_of_images
   1.536 -        setsum_cartesian_product' setsum_product setsum_subtractf setsum_addf
   1.537 -        setsum_left_distrib[symmetric] setsum_real_distribution
   1.538 -      cong: setsum_cong)
   1.539 -qed
   1.540  
   1.541  lemma (in finite_information_space) conditional_mutual_information_eq:
   1.542    "\<I>(X ; Y | Z) = (\<Sum>(x, y, z) \<in> X ` space M \<times> Y ` space M \<times> Z ` space M.
   1.543               real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) *
   1.544               log b (real (distribution (\<lambda>x. (X x, Y x, Z x)) {(x, y, z)}) /
   1.545      (real (joint_distribution X Z {(x, z)}) * real (joint_distribution Y Z {(y,z)} / distribution Z {z}))))"
   1.546 -  unfolding conditional_mutual_information_def Let_def mutual_information_eq
   1.547 -  by (subst mutual_information_eq_generic)
   1.548 +  by (subst conditional_mutual_information_generic_eq)
   1.549       (auto simp add: prod_measure_space_def sigma_prod_sets_finite finite_space
   1.550 -      finite_prob_space_of_images finite_product_prob_space_of_images sigma_def
   1.551 +      finite_measure_space finite_product_prob_space_of_images sigma_def
   1.552        setsum_cartesian_product' setsum_product setsum_subtractf setsum_addf
   1.553        setsum_left_distrib[symmetric] setsum_real_distribution setsum_commute[where A="Y`space M"]
   1.554        real_of_pinfreal_mult[symmetric]
   1.555 @@ -623,22 +635,6 @@
   1.556      by (simp add: setsum_cartesian_product' distribution_remove_const)
   1.557  qed
   1.558  
   1.559 -lemma (in finite_prob_space) distribution_finite:
   1.560 -  "distribution X A \<noteq> \<omega>"
   1.561 -  by (auto simp: sets_eq_Pow distribution_def intro!: measure_finite)
   1.562 -
   1.563 -lemma (in finite_prob_space) real_distribution_order:
   1.564 -  shows "r \<le> real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r \<le> real (distribution X {x})"
   1.565 -  and "r \<le> real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r \<le> real (distribution Y {y})"
   1.566 -  and "r < real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r < real (distribution X {x})"
   1.567 -  and "r < real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r < real (distribution Y {y})"
   1.568 -  and "distribution X {x} = 0 \<Longrightarrow> real (joint_distribution X Y {(x, y)}) = 0"
   1.569 -  and "distribution Y {y} = 0 \<Longrightarrow> real (joint_distribution X Y {(x, y)}) = 0"
   1.570 -  using real_of_pinfreal_mono[OF distribution_finite joint_distribution_restriction_fst, of X Y "{(x, y)}"]
   1.571 -  using real_of_pinfreal_mono[OF distribution_finite joint_distribution_restriction_snd, of X Y "{(x, y)}"]
   1.572 -  using real_pinfreal_nonneg[of "joint_distribution X Y {(x, y)}"]
   1.573 -  by auto
   1.574 -
   1.575  lemma (in finite_information_space) conditional_mutual_information_positive:
   1.576    "0 \<le> \<I>(X ; Y | Z)"
   1.577  proof -
   1.578 @@ -682,6 +678,8 @@
   1.579      by (simp add: real_of_pinfreal_mult[symmetric])
   1.580  qed
   1.581  
   1.582 +subsection {* Conditional Entropy *}
   1.583 +
   1.584  definition (in prob_space)
   1.585    "conditional_entropy b S T X Y = conditional_mutual_information b S S T X X Y"
   1.586  
   1.587 @@ -694,19 +692,69 @@
   1.588  lemma (in finite_information_space) conditional_entropy_positive:
   1.589    "0 \<le> \<H>(X | Y)" unfolding conditional_entropy_def using conditional_mutual_information_positive .
   1.590  
   1.591 +lemma (in finite_information_space) conditional_entropy_generic_eq:
   1.592 +  assumes MX: "finite_measure_space MX (distribution X)"
   1.593 +  assumes MY: "finite_measure_space MZ (distribution Z)"
   1.594 +  shows "conditional_entropy b MX MZ X Z =
   1.595 +     - (\<Sum>(x, z)\<in>space MX \<times> space MZ.
   1.596 +         real (joint_distribution X Z {(x, z)}) *
   1.597 +         log b (real (joint_distribution X Z {(x, z)}) / real (distribution Z {z})))"
   1.598 +  unfolding conditional_entropy_def using assms
   1.599 +  apply (simp add: conditional_mutual_information_generic_eq
   1.600 +                   setsum_cartesian_product' setsum_commute[of _ "space MZ"]
   1.601 +                   setsum_negf[symmetric] setsum_subtractf[symmetric])
   1.602 +proof (safe intro!: setsum_cong, simp)
   1.603 +  fix z x assume "z \<in> space MZ" "x \<in> space MX"
   1.604 +  let "?XXZ x'" = "real (joint_distribution X (\<lambda>x. (X x, Z x)) {(x, x', z)})"
   1.605 +  let "?XZ x'" = "real (joint_distribution X Z {(x', z)})"
   1.606 +  let "?X" = "real (distribution X {x})"
   1.607 +  interpret MX: finite_measure_space MX "distribution X" by fact
   1.608 +  have *: "\<And>A. A = {} \<Longrightarrow> prob A = 0" by simp
   1.609 +  have XXZ: "\<And>x'. ?XXZ x' = (if x' = x then ?XZ x else 0)"
   1.610 +    by (auto simp: distribution_def intro!: arg_cong[where f=prob] *)
   1.611 +  have "(\<Sum>x'\<in>space MX. ?XXZ x' * log b (?XXZ x') - (?XXZ x' * log b ?X + ?XXZ x' * log b (?XZ x'))) =
   1.612 +    (\<Sum>x'\<in>{x}. ?XZ x' * log b (?XZ x') - (?XZ x' * log b ?X + ?XZ x' * log b (?XZ x')))"
   1.613 +    using `x \<in> space MX` MX.finite_space
   1.614 +    by (safe intro!: setsum_mono_zero_cong_right)
   1.615 +       (auto split: split_if_asm simp: XXZ)
   1.616 +  then show "(\<Sum>x'\<in>space MX. ?XXZ x' * log b (?XXZ x') - (?XXZ x' * log b ?X + ?XXZ x' * log b (?XZ x'))) +
   1.617 +      ?XZ x * log b ?X = 0" by simp
   1.618 +qed
   1.619 +
   1.620  lemma (in finite_information_space) conditional_entropy_eq:
   1.621    "\<H>(X | Z) =
   1.622       - (\<Sum>(x, z)\<in>X ` space M \<times> Z ` space M.
   1.623           real (joint_distribution X Z {(x, z)}) *
   1.624           log b (real (joint_distribution X Z {(x, z)}) / real (distribution Z {z})))"
   1.625 +  by (simp add: finite_measure_space conditional_entropy_generic_eq)
   1.626 +
   1.627 +lemma (in finite_information_space) conditional_entropy_eq_ce_with_hypothesis:
   1.628 +  "\<H>(X | Y) =
   1.629 +    -(\<Sum>y\<in>Y`space M. real (distribution Y {y}) *
   1.630 +      (\<Sum>x\<in>X`space M. real (joint_distribution X Y {(x,y)}) / real (distribution Y {(y)}) *
   1.631 +              log b (real (joint_distribution X Y {(x,y)}) / real (distribution Y {(y)}))))"
   1.632 +  unfolding conditional_entropy_eq neg_equal_iff_equal
   1.633 +  apply (simp add: setsum_commute[of _ "Y`space M"] setsum_cartesian_product' setsum_divide_distrib[symmetric])
   1.634 +  apply (safe intro!: setsum_cong)
   1.635 +  using real_distribution_order'[of Y _ X _]
   1.636 +  by (auto simp add: setsum_subtractf[of _ _ "X`space M"])
   1.637 +
   1.638 +lemma (in finite_information_space) conditional_entropy_eq_cartesian_sum:
   1.639 +  "\<H>(X | Y) = -(\<Sum>x\<in>X`space M. \<Sum>y\<in>Y`space M.
   1.640 +    real (joint_distribution X Y {(x,y)}) *
   1.641 +    log b (real (joint_distribution X Y {(x,y)}) / real (distribution Y {y})))"
   1.642  proof -
   1.643 -  have *: "\<And>x y z. (\<lambda>x. (X x, X x, Z x)) -` {(x, y, z)} = (if x = y then (\<lambda>x. (X x, Z x)) -` {(x, z)} else {})" by auto
   1.644 -  show ?thesis
   1.645 -    unfolding conditional_mutual_information_eq_sum
   1.646 -      conditional_entropy_def distribution_def *
   1.647 -    by (auto intro!: setsum_0')
   1.648 +  { fix x assume "x\<notin>(\<lambda>x. (X x, Y x))`space M"
   1.649 +    then have "(\<lambda>x. (X x, Y x)) -` {x} \<inter> space M = {}" by auto
   1.650 +    then have "joint_distribution X Y {x} = 0"
   1.651 +      unfolding distribution_def by auto }
   1.652 +  then show ?thesis using finite_space
   1.653 +    unfolding conditional_entropy_eq neg_equal_iff_equal setsum_cartesian_product
   1.654 +    by (auto intro!: setsum_mono_zero_cong_left)
   1.655  qed
   1.656  
   1.657 +subsection {* Equalities *}
   1.658 +
   1.659  lemma (in finite_information_space) mutual_information_eq_entropy_conditional_entropy:
   1.660    "\<I>(X ; Z) = \<H>(X) - \<H>(X | Z)"
   1.661    unfolding mutual_information_eq entropy_eq conditional_entropy_eq
   1.662 @@ -722,109 +770,15 @@
   1.663    show ?thesis by auto
   1.664  qed
   1.665  
   1.666 -(* -------------Entropy of a RV with a certain event is zero---------------- *)
   1.667 -
   1.668 -lemma (in finite_information_space) finite_entropy_certainty_eq_0:
   1.669 -  assumes "x \<in> X ` space M" and "distribution X {x} = 1"
   1.670 -  shows "\<H>(X) = 0"
   1.671 -proof -
   1.672 -  interpret X: finite_prob_space "\<lparr> space = X ` space M, sets = Pow (X ` space M) \<rparr>" "distribution X"
   1.673 -    by (rule finite_prob_space_of_images)
   1.674 -
   1.675 -  have "distribution X (X ` space M - {x}) = distribution X (X ` space M) - distribution X {x}"
   1.676 -    using X.measure_compl[of "{x}"] assms by auto
   1.677 -  also have "\<dots> = 0" using X.prob_space assms by auto
   1.678 -  finally have X0: "distribution X (X ` space M - {x}) = 0" by auto
   1.679 -
   1.680 -  { fix y assume asm: "y \<noteq> x" "y \<in> X ` space M"
   1.681 -    hence "{y} \<subseteq> X ` space M - {x}" by auto
   1.682 -    from X.measure_mono[OF this] X0 asm
   1.683 -    have "distribution X {y} = 0" by auto }
   1.684 -
   1.685 -  hence fi: "\<And> y. y \<in> X ` space M \<Longrightarrow> real (distribution X {y}) = (if x = y then 1 else 0)"
   1.686 -    using assms by auto
   1.687 -
   1.688 -  have y: "\<And>y. (if x = y then 1 else 0) * log b (if x = y then 1 else 0) = 0" by simp
   1.689 -
   1.690 -  show ?thesis unfolding entropy_eq by (auto simp: y fi)
   1.691 -qed
   1.692 -(* --------------- upper bound on entropy for a rv ------------------------- *)
   1.693 -
   1.694 -lemma (in finite_prob_space) distribution_1:
   1.695 -  "distribution X A \<le> 1"
   1.696 -  unfolding distribution_def measure_space_1[symmetric]
   1.697 -  by (auto intro!: measure_mono simp: sets_eq_Pow)
   1.698 -
   1.699 -lemma (in finite_prob_space) real_distribution_1:
   1.700 -  "real (distribution X A) \<le> 1"
   1.701 -  unfolding real_pinfreal_1[symmetric]
   1.702 -  by (rule real_of_pinfreal_mono[OF _ distribution_1]) simp
   1.703 +lemma (in finite_information_space) entropy_chain_rule:
   1.704 +  "\<H>(\<lambda>x. (X x, Y x)) = \<H>(X) + \<H>(Y|X)"
   1.705 +  unfolding entropy_eq[of X] entropy_eq_cartesian_sum conditional_entropy_eq_cartesian_sum
   1.706 +  unfolding setsum_commute[of _ "X`space M"] setsum_negf[symmetric] setsum_addf[symmetric]
   1.707 +  by (rule setsum_cong)
   1.708 +     (simp_all add: setsum_negf setsum_addf setsum_subtractf setsum_real_distribution
   1.709 +                    setsum_left_distrib[symmetric] joint_distribution_commute[of X Y])
   1.710  
   1.711 -lemma (in finite_information_space) finite_entropy_le_card:
   1.712 -  "\<H>(X) \<le> log b (real (card (X ` space M \<inter> {x . distribution X {x} \<noteq> 0})))"
   1.713 -proof -
   1.714 -  let "?d x" = "distribution X {x}"
   1.715 -  let "?p x" = "real (?d x)"
   1.716 -  have "\<H>(X) = (\<Sum>x\<in>X`space M. ?p x * log b (1 / ?p x))"
   1.717 -    by (auto intro!: setsum_cong simp: entropy_eq setsum_negf[symmetric])
   1.718 -  also have "\<dots> \<le> log b (\<Sum>x\<in>X`space M. ?p x * (1 / ?p x))"
   1.719 -    apply (rule log_setsum')
   1.720 -    using not_empty b_gt_1 finite_space sum_over_space_real_distribution
   1.721 -    by auto
   1.722 -  also have "\<dots> = log b (\<Sum>x\<in>X`space M. if ?d x \<noteq> 0 then 1 else 0)"
   1.723 -    apply (rule arg_cong[where f="\<lambda>f. log b (\<Sum>x\<in>X`space M. f x)"])
   1.724 -    using distribution_finite[of X] by (auto simp: expand_fun_eq real_of_pinfreal_eq_0)
   1.725 -  finally show ?thesis
   1.726 -    using finite_space by (auto simp: setsum_cases real_eq_of_nat)
   1.727 -qed
   1.728 -
   1.729 -(* --------------- entropy is maximal for a uniform rv --------------------- *)
   1.730 -
   1.731 -lemma (in finite_prob_space) uniform_prob:
   1.732 -  assumes "x \<in> space M"
   1.733 -  assumes "\<And> x y. \<lbrakk>x \<in> space M ; y \<in> space M\<rbrakk> \<Longrightarrow> prob {x} = prob {y}"
   1.734 -  shows "prob {x} = 1 / real (card (space M))"
   1.735 -proof -
   1.736 -  have prob_x: "\<And> y. y \<in> space M \<Longrightarrow> prob {y} = prob {x}"
   1.737 -    using assms(2)[OF _ `x \<in> space M`] by blast
   1.738 -  have "1 = prob (space M)"
   1.739 -    using prob_space by auto
   1.740 -  also have "\<dots> = (\<Sum> x \<in> space M. prob {x})"
   1.741 -    using real_finite_measure_finite_Union[of "space M" "\<lambda> x. {x}", simplified]
   1.742 -      sets_eq_Pow inj_singleton[unfolded inj_on_def, rule_format]
   1.743 -      finite_space unfolding disjoint_family_on_def  prob_space[symmetric]
   1.744 -    by (auto simp add:setsum_restrict_set)
   1.745 -  also have "\<dots> = (\<Sum> y \<in> space M. prob {x})"
   1.746 -    using prob_x by auto
   1.747 -  also have "\<dots> = real_of_nat (card (space M)) * prob {x}" by simp
   1.748 -  finally have one: "1 = real (card (space M)) * prob {x}"
   1.749 -    using real_eq_of_nat by auto
   1.750 -  hence two: "real (card (space M)) \<noteq> 0" by fastsimp 
   1.751 -  from one have three: "prob {x} \<noteq> 0" by fastsimp
   1.752 -  thus ?thesis using one two three divide_cancel_right
   1.753 -    by (auto simp:field_simps)
   1.754 -qed
   1.755 -
   1.756 -lemma (in finite_information_space) finite_entropy_uniform_max:
   1.757 -  assumes "\<And>x y. \<lbrakk> x \<in> X ` space M ; y \<in> X ` space M \<rbrakk> \<Longrightarrow> distribution X {x} = distribution X {y}"
   1.758 -  shows "\<H>(X) = log b (real (card (X ` space M)))"
   1.759 -proof -
   1.760 -  note uniform =
   1.761 -    finite_prob_space_of_images[of X, THEN finite_prob_space.uniform_prob, simplified]
   1.762 -
   1.763 -  have card_gt0: "0 < card (X ` space M)" unfolding card_gt_0_iff
   1.764 -    using finite_space not_empty by auto
   1.765 -
   1.766 -  { fix x assume "x \<in> X ` space M"
   1.767 -    hence "real (distribution X {x}) = 1 / real (card (X ` space M))"
   1.768 -    proof (rule uniform)
   1.769 -      fix x y assume "x \<in> X`space M" "y \<in> X`space M"
   1.770 -      from assms[OF this] show "real (distribution X {x}) = real (distribution X {y})" by simp
   1.771 -    qed }
   1.772 -  thus ?thesis
   1.773 -    using not_empty finite_space b_gt_1 card_gt0
   1.774 -    by (simp add: entropy_eq real_eq_of_nat[symmetric] log_divide)
   1.775 -qed
   1.776 +section {* Partitioning *}
   1.777  
   1.778  definition "subvimage A f g \<longleftrightarrow> (\<forall>x \<in> A. f -` {f x} \<inter> A \<subseteq> g -` {g x} \<inter> A)"
   1.779  
   1.780 @@ -976,38 +930,6 @@
   1.781    "\<H>(f \<circ> X) \<le> \<H>(X)"
   1.782    by (subst (2) entropy_partition[of _ "f \<circ> X"]) (auto intro: conditional_entropy_positive)
   1.783  
   1.784 -lemma (in prob_space) distribution_cong:
   1.785 -  assumes "\<And>x. x \<in> space M \<Longrightarrow> X x = Y x"
   1.786 -  shows "distribution X = distribution Y"
   1.787 -  unfolding distribution_def expand_fun_eq
   1.788 -  using assms by (auto intro!: arg_cong[where f="\<mu>"])
   1.789 -
   1.790 -lemma (in prob_space) joint_distribution_cong:
   1.791 -  assumes "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
   1.792 -  assumes "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
   1.793 -  shows "joint_distribution X Y = joint_distribution X' Y'"
   1.794 -  unfolding distribution_def expand_fun_eq
   1.795 -  using assms by (auto intro!: arg_cong[where f="\<mu>"])
   1.796 -
   1.797 -lemma image_cong:
   1.798 -  "\<lbrakk> \<And>x. x \<in> S \<Longrightarrow> X x = X' x \<rbrakk> \<Longrightarrow> X ` S = X' ` S"
   1.799 -  by (auto intro!: image_eqI)
   1.800 -
   1.801 -lemma (in finite_information_space) mutual_information_cong:
   1.802 -  assumes X: "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
   1.803 -  assumes Y: "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
   1.804 -  shows "\<I>(X ; Y) = \<I>(X' ; Y')"
   1.805 -proof -
   1.806 -  have "X ` space M = X' ` space M" using X by (rule image_cong)
   1.807 -  moreover have "Y ` space M = Y' ` space M" using Y by (rule image_cong)
   1.808 -  ultimately show ?thesis
   1.809 -  unfolding mutual_information_eq
   1.810 -    using
   1.811 -      assms[THEN distribution_cong]
   1.812 -      joint_distribution_cong[OF assms]
   1.813 -    by (auto intro!: setsum_cong)
   1.814 -qed
   1.815 -
   1.816  corollary (in finite_information_space) entropy_of_inj:
   1.817    assumes "inj_on f (X`space M)"
   1.818    shows "\<H>(f \<circ> X) = \<H>(X)"
     2.1 --- a/src/HOL/Probability/Probability_Space.thy	Thu Sep 02 17:28:00 2010 +0200
     2.2 +++ b/src/HOL/Probability/Probability_Space.thy	Thu Sep 02 19:51:53 2010 +0200
     2.3 @@ -2,8 +2,6 @@
     2.4  imports Lebesgue_Integration Radon_Nikodym
     2.5  begin
     2.6  
     2.7 -
     2.8 -
     2.9  locale prob_space = measure_space +
    2.10    assumes measure_space_1: "\<mu> (space M) = 1"
    2.11  
    2.12 @@ -33,6 +31,19 @@
    2.13  abbreviation
    2.14    "joint_distribution X Y \<equiv> distribution (\<lambda>x. (X x, Y x))"
    2.15  
    2.16 +lemma (in prob_space) distribution_cong:
    2.17 +  assumes "\<And>x. x \<in> space M \<Longrightarrow> X x = Y x"
    2.18 +  shows "distribution X = distribution Y"
    2.19 +  unfolding distribution_def expand_fun_eq
    2.20 +  using assms by (auto intro!: arg_cong[where f="\<mu>"])
    2.21 +
    2.22 +lemma (in prob_space) joint_distribution_cong:
    2.23 +  assumes "\<And>x. x \<in> space M \<Longrightarrow> X x = X' x"
    2.24 +  assumes "\<And>x. x \<in> space M \<Longrightarrow> Y x = Y' x"
    2.25 +  shows "joint_distribution X Y = joint_distribution X' Y'"
    2.26 +  unfolding distribution_def expand_fun_eq
    2.27 +  using assms by (auto intro!: arg_cong[where f="\<mu>"])
    2.28 +
    2.29  lemma prob_space: "prob (space M) = 1"
    2.30    unfolding measure_space_1 by simp
    2.31  
    2.32 @@ -327,18 +338,158 @@
    2.33      joint_distribution_restriction_snd[of X Y "{(x, y)}"]
    2.34    by auto
    2.35  
    2.36 -lemma (in finite_prob_space) finite_prob_space_of_images:
    2.37 -  "finite_prob_space \<lparr> space = X ` space M, sets = Pow (X ` space M)\<rparr> (distribution X)"
    2.38 -  by (simp add: finite_prob_space_eq finite_measure_space)
    2.39 +lemma (in finite_prob_space) distribution_mono:
    2.40 +  assumes "\<And>t. \<lbrakk> t \<in> space M ; X t \<in> x \<rbrakk> \<Longrightarrow> Y t \<in> y"
    2.41 +  shows "distribution X x \<le> distribution Y y"
    2.42 +  unfolding distribution_def
    2.43 +  using assms by (auto simp: sets_eq_Pow intro!: measure_mono)
    2.44 +
    2.45 +lemma (in finite_prob_space) distribution_mono_gt_0:
    2.46 +  assumes gt_0: "0 < distribution X x"
    2.47 +  assumes *: "\<And>t. \<lbrakk> t \<in> space M ; X t \<in> x \<rbrakk> \<Longrightarrow> Y t \<in> y"
    2.48 +  shows "0 < distribution Y y"
    2.49 +  by (rule less_le_trans[OF gt_0 distribution_mono]) (rule *)
    2.50 +
    2.51 +lemma (in finite_prob_space) sum_over_space_distrib:
    2.52 +  "(\<Sum>x\<in>X`space M. distribution X {x}) = 1"
    2.53 +  unfolding distribution_def measure_space_1[symmetric] using finite_space
    2.54 +  by (subst measure_finitely_additive'')
    2.55 +     (auto simp add: disjoint_family_on_def sets_eq_Pow intro!: arg_cong[where f=\<mu>])
    2.56 +
    2.57 +lemma (in finite_prob_space) sum_over_space_real_distribution:
    2.58 +  "(\<Sum>x\<in>X`space M. real (distribution X {x})) = 1"
    2.59 +  unfolding distribution_def prob_space[symmetric] using finite_space
    2.60 +  by (subst real_finite_measure_finite_Union[symmetric])
    2.61 +     (auto simp add: disjoint_family_on_def sets_eq_Pow intro!: arg_cong[where f=prob])
    2.62 +
    2.63 +lemma (in finite_prob_space) finite_sum_over_space_eq_1:
    2.64 +  "(\<Sum>x\<in>space M. real (\<mu> {x})) = 1"
    2.65 +  using sum_over_space_eq_1 finite_measure by (simp add: real_of_pinfreal_setsum sets_eq_Pow)
    2.66 +
    2.67 +lemma (in finite_prob_space) distribution_finite:
    2.68 +  "distribution X A \<noteq> \<omega>"
    2.69 +  using finite_measure[of "X -` A \<inter> space M"]
    2.70 +  unfolding distribution_def sets_eq_Pow by auto
    2.71 +
    2.72 +lemma (in finite_prob_space) real_distribution_gt_0[simp]:
    2.73 +  "0 < real (distribution Y y) \<longleftrightarrow>  0 < distribution Y y"
    2.74 +  using assms by (auto intro!: real_pinfreal_pos distribution_finite)
    2.75 +
    2.76 +lemma (in finite_prob_space) real_distribution_mult_pos_pos:
    2.77 +  assumes "0 < distribution Y y"
    2.78 +  and "0 < distribution X x"
    2.79 +  shows "0 < real (distribution Y y * distribution X x)"
    2.80 +  unfolding real_of_pinfreal_mult[symmetric]
    2.81 +  using assms by (auto intro!: mult_pos_pos)
    2.82 +
    2.83 +lemma (in finite_prob_space) real_distribution_divide_pos_pos:
    2.84 +  assumes "0 < distribution Y y"
    2.85 +  and "0 < distribution X x"
    2.86 +  shows "0 < real (distribution Y y / distribution X x)"
    2.87 +  unfolding divide_pinfreal_def real_of_pinfreal_mult[symmetric]
    2.88 +  using assms distribution_finite[of X x] by (cases "distribution X x") (auto intro!: mult_pos_pos)
    2.89 +
    2.90 +lemma (in finite_prob_space) real_distribution_mult_inverse_pos_pos:
    2.91 +  assumes "0 < distribution Y y"
    2.92 +  and "0 < distribution X x"
    2.93 +  shows "0 < real (distribution Y y * inverse (distribution X x))"
    2.94 +  unfolding divide_pinfreal_def real_of_pinfreal_mult[symmetric]
    2.95 +  using assms distribution_finite[of X x] by (cases "distribution X x") (auto intro!: mult_pos_pos)
    2.96 +
    2.97 +lemma (in prob_space) distribution_remove_const:
    2.98 +  shows "joint_distribution X (\<lambda>x. ()) {(x, ())} = distribution X {x}"
    2.99 +  and "joint_distribution (\<lambda>x. ()) X {((), x)} = distribution X {x}"
   2.100 +  and "joint_distribution X (\<lambda>x. (Y x, ())) {(x, y, ())} = joint_distribution X Y {(x, y)}"
   2.101 +  and "joint_distribution X (\<lambda>x. ((), Y x)) {(x, (), y)} = joint_distribution X Y {(x, y)}"
   2.102 +  and "distribution (\<lambda>x. ()) {()} = 1"
   2.103 +  unfolding measure_space_1[symmetric]
   2.104 +  by (auto intro!: arg_cong[where f="\<mu>"] simp: distribution_def)
   2.105  
   2.106 -lemma (in finite_prob_space) finite_product_prob_space_of_images:
   2.107 -  "finite_prob_space \<lparr> space = X ` space M \<times> Y ` space M, sets = Pow (X ` space M \<times> Y ` space M)\<rparr>
   2.108 -                     (joint_distribution X Y)"
   2.109 -  (is "finite_prob_space ?S _")
   2.110 -proof (simp add: finite_prob_space_eq finite_product_measure_space_of_images)
   2.111 -  have "X -` X ` space M \<inter> Y -` Y ` space M \<inter> space M = space M" by auto
   2.112 -  thus "joint_distribution X Y (X ` space M \<times> Y ` space M) = 1"
   2.113 -    by (simp add: distribution_def prob_space vimage_Times comp_def measure_space_1)
   2.114 +lemma (in finite_prob_space) setsum_distribution_gen:
   2.115 +  assumes "Z -` {c} \<inter> space M = (\<Union>x \<in> X`space M. Y -` {f x}) \<inter> space M"
   2.116 +  and "inj_on f (X`space M)"
   2.117 +  shows "(\<Sum>x \<in> X`space M. distribution Y {f x}) = distribution Z {c}"
   2.118 +  unfolding distribution_def assms
   2.119 +  using finite_space assms
   2.120 +  by (subst measure_finitely_additive'')
   2.121 +     (auto simp add: disjoint_family_on_def sets_eq_Pow inj_on_def
   2.122 +      intro!: arg_cong[where f=prob])
   2.123 +
   2.124 +lemma (in finite_prob_space) setsum_distribution:
   2.125 +  "(\<Sum>x \<in> X`space M. joint_distribution X Y {(x, y)}) = distribution Y {y}"
   2.126 +  "(\<Sum>y \<in> Y`space M. joint_distribution X Y {(x, y)}) = distribution X {x}"
   2.127 +  "(\<Sum>x \<in> X`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution Y Z {(y, z)}"
   2.128 +  "(\<Sum>y \<in> Y`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution X Z {(x, z)}"
   2.129 +  "(\<Sum>z \<in> Z`space M. joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)}) = joint_distribution X Y {(x, y)}"
   2.130 +  by (auto intro!: inj_onI setsum_distribution_gen)
   2.131 +
   2.132 +lemma (in finite_prob_space) setsum_real_distribution_gen:
   2.133 +  assumes "Z -` {c} \<inter> space M = (\<Union>x \<in> X`space M. Y -` {f x}) \<inter> space M"
   2.134 +  and "inj_on f (X`space M)"
   2.135 +  shows "(\<Sum>x \<in> X`space M. real (distribution Y {f x})) = real (distribution Z {c})"
   2.136 +  unfolding distribution_def assms
   2.137 +  using finite_space assms
   2.138 +  by (subst real_finite_measure_finite_Union[symmetric])
   2.139 +     (auto simp add: disjoint_family_on_def sets_eq_Pow inj_on_def
   2.140 +        intro!: arg_cong[where f=prob])
   2.141 +
   2.142 +lemma (in finite_prob_space) setsum_real_distribution:
   2.143 +  "(\<Sum>x \<in> X`space M. real (joint_distribution X Y {(x, y)})) = real (distribution Y {y})"
   2.144 +  "(\<Sum>y \<in> Y`space M. real (joint_distribution X Y {(x, y)})) = real (distribution X {x})"
   2.145 +  "(\<Sum>x \<in> X`space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)})) = real (joint_distribution Y Z {(y, z)})"
   2.146 +  "(\<Sum>y \<in> Y`space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)})) = real (joint_distribution X Z {(x, z)})"
   2.147 +  "(\<Sum>z \<in> Z`space M. real (joint_distribution X (\<lambda>x. (Y x, Z x)) {(x, y, z)})) = real (joint_distribution X Y {(x, y)})"
   2.148 +  by (auto intro!: inj_onI setsum_real_distribution_gen)
   2.149 +
   2.150 +lemma (in finite_prob_space) real_distribution_order:
   2.151 +  shows "r \<le> real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r \<le> real (distribution X {x})"
   2.152 +  and "r \<le> real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r \<le> real (distribution Y {y})"
   2.153 +  and "r < real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r < real (distribution X {x})"
   2.154 +  and "r < real (joint_distribution X Y {(x, y)}) \<Longrightarrow> r < real (distribution Y {y})"
   2.155 +  and "distribution X {x} = 0 \<Longrightarrow> real (joint_distribution X Y {(x, y)}) = 0"
   2.156 +  and "distribution Y {y} = 0 \<Longrightarrow> real (joint_distribution X Y {(x, y)}) = 0"
   2.157 +  using real_of_pinfreal_mono[OF distribution_finite joint_distribution_restriction_fst, of X Y "{(x, y)}"]
   2.158 +  using real_of_pinfreal_mono[OF distribution_finite joint_distribution_restriction_snd, of X Y "{(x, y)}"]
   2.159 +  using real_pinfreal_nonneg[of "joint_distribution X Y {(x, y)}"]
   2.160 +  by auto
   2.161 +
   2.162 +lemma (in prob_space) joint_distribution_remove[simp]:
   2.163 +    "joint_distribution X X {(x, x)} = distribution X {x}"
   2.164 +  unfolding distribution_def by (auto intro!: arg_cong[where f="\<mu>"])
   2.165 +
   2.166 +lemma (in finite_prob_space) distribution_1:
   2.167 +  "distribution X A \<le> 1"
   2.168 +  unfolding distribution_def measure_space_1[symmetric]
   2.169 +  by (auto intro!: measure_mono simp: sets_eq_Pow)
   2.170 +
   2.171 +lemma (in finite_prob_space) real_distribution_1:
   2.172 +  "real (distribution X A) \<le> 1"
   2.173 +  unfolding real_pinfreal_1[symmetric]
   2.174 +  by (rule real_of_pinfreal_mono[OF _ distribution_1]) simp
   2.175 +
   2.176 +lemma (in finite_prob_space) uniform_prob:
   2.177 +  assumes "x \<in> space M"
   2.178 +  assumes "\<And> x y. \<lbrakk>x \<in> space M ; y \<in> space M\<rbrakk> \<Longrightarrow> prob {x} = prob {y}"
   2.179 +  shows "prob {x} = 1 / real (card (space M))"
   2.180 +proof -
   2.181 +  have prob_x: "\<And> y. y \<in> space M \<Longrightarrow> prob {y} = prob {x}"
   2.182 +    using assms(2)[OF _ `x \<in> space M`] by blast
   2.183 +  have "1 = prob (space M)"
   2.184 +    using prob_space by auto
   2.185 +  also have "\<dots> = (\<Sum> x \<in> space M. prob {x})"
   2.186 +    using real_finite_measure_finite_Union[of "space M" "\<lambda> x. {x}", simplified]
   2.187 +      sets_eq_Pow inj_singleton[unfolded inj_on_def, rule_format]
   2.188 +      finite_space unfolding disjoint_family_on_def  prob_space[symmetric]
   2.189 +    by (auto simp add:setsum_restrict_set)
   2.190 +  also have "\<dots> = (\<Sum> y \<in> space M. prob {x})"
   2.191 +    using prob_x by auto
   2.192 +  also have "\<dots> = real_of_nat (card (space M)) * prob {x}" by simp
   2.193 +  finally have one: "1 = real (card (space M)) * prob {x}"
   2.194 +    using real_eq_of_nat by auto
   2.195 +  hence two: "real (card (space M)) \<noteq> 0" by fastsimp 
   2.196 +  from one have three: "prob {x} \<noteq> 0" by fastsimp
   2.197 +  thus ?thesis using one two three divide_cancel_right
   2.198 +    by (auto simp:field_simps)
   2.199  qed
   2.200  
   2.201  lemma (in prob_space) prob_space_subalgebra:
   2.202 @@ -382,70 +533,54 @@
   2.203  qed
   2.204  
   2.205  lemma (in finite_prob_space) finite_measure_space:
   2.206 +  fixes X :: "'a \<Rightarrow> 'x"
   2.207    shows "finite_measure_space \<lparr>space = X ` space M, sets = Pow (X ` space M)\<rparr> (distribution X)"
   2.208      (is "finite_measure_space ?S _")
   2.209  proof (rule finite_measure_spaceI, simp_all)
   2.210    show "finite (X ` space M)" using finite_space by simp
   2.211 -
   2.212 -  show "positive (distribution X)"
   2.213 -    unfolding distribution_def positive_def using sets_eq_Pow by auto
   2.214 -
   2.215 -  show "additive ?S (distribution X)" unfolding additive_def distribution_def
   2.216 -  proof (simp, safe)
   2.217 -    fix x y
   2.218 -    have x: "(X -` x) \<inter> space M \<in> sets M"
   2.219 -      and y: "(X -` y) \<inter> space M \<in> sets M" using sets_eq_Pow by auto
   2.220 -    assume "x \<inter> y = {}"
   2.221 -    hence "X -` x \<inter> space M \<inter> (X -` y \<inter> space M) = {}" by auto
   2.222 -    from additive[unfolded additive_def, rule_format, OF x y] this
   2.223 -      finite_measure[OF x] finite_measure[OF y]
   2.224 -    have "\<mu> (((X -` x) \<union> (X -` y)) \<inter> space M) =
   2.225 -      \<mu> ((X -` x) \<inter> space M) + \<mu> ((X -` y) \<inter> space M)"
   2.226 -      by (subst Int_Un_distrib2) auto
   2.227 -    thus "\<mu> ((X -` x \<union> X -` y) \<inter> space M) = \<mu> (X -` x \<inter> space M) + \<mu> (X -` y \<inter> space M)"
   2.228 -      by auto
   2.229 -  qed
   2.230 -
   2.231 -  { fix x assume "x \<in> X ` space M" thus "distribution X {x} \<noteq> \<omega>"
   2.232 -    unfolding distribution_def by (auto intro!: finite_measure simp: sets_eq_Pow) }
   2.233 +next
   2.234 +  fix A B :: "'x set" assume "A \<inter> B = {}"
   2.235 +  then show "distribution X (A \<union> B) = distribution X A + distribution X B"
   2.236 +    unfolding distribution_def
   2.237 +    by (subst measure_additive)
   2.238 +       (auto intro!: arg_cong[where f=\<mu>] simp: sets_eq_Pow)
   2.239  qed
   2.240  
   2.241 +lemma (in finite_prob_space) finite_prob_space_of_images:
   2.242 +  "finite_prob_space \<lparr> space = X ` space M, sets = Pow (X ` space M)\<rparr> (distribution X)"
   2.243 +  by (simp add: finite_prob_space_eq finite_measure_space)
   2.244 +
   2.245 +lemma (in prob_space) joint_distribution_commute:
   2.246 +  "joint_distribution X Y x = joint_distribution Y X ((\<lambda>(x,y). (y,x))`x)"
   2.247 +  unfolding distribution_def by (auto intro!: arg_cong[where f=\<mu>])
   2.248 +
   2.249 +lemma (in finite_prob_space) real_distribution_order':
   2.250 +  shows "real (distribution X {x}) = 0 \<Longrightarrow> real (joint_distribution X Y {(x, y)}) = 0"
   2.251 +  and "real (distribution Y {y}) = 0 \<Longrightarrow> real (joint_distribution X Y {(x, y)}) = 0"
   2.252 +  using real_of_pinfreal_mono[OF distribution_finite joint_distribution_restriction_fst, of X Y "{(x, y)}"]
   2.253 +  using real_of_pinfreal_mono[OF distribution_finite joint_distribution_restriction_snd, of X Y "{(x, y)}"]
   2.254 +  using real_pinfreal_nonneg[of "joint_distribution X Y {(x, y)}"]
   2.255 +  by auto
   2.256 +
   2.257  lemma (in finite_prob_space) finite_product_measure_space:
   2.258 +  fixes X :: "'a \<Rightarrow> 'x" and Y :: "'a \<Rightarrow> 'y"
   2.259    assumes "finite s1" "finite s2"
   2.260    shows "finite_measure_space \<lparr> space = s1 \<times> s2, sets = Pow (s1 \<times> s2)\<rparr> (joint_distribution X Y)"
   2.261      (is "finite_measure_space ?M ?D")
   2.262 -proof (rule finite_Pow_additivity_sufficient)
   2.263 -  show "positive ?D"
   2.264 -    unfolding positive_def using assms sets_eq_Pow
   2.265 -    by (simp add: distribution_def)
   2.266 -
   2.267 -  show "additive ?M ?D" unfolding additive_def
   2.268 -  proof safe
   2.269 -    fix x y
   2.270 -    have A: "((\<lambda>x. (X x, Y x)) -` x) \<inter> space M \<in> sets M" using assms sets_eq_Pow by auto
   2.271 -    have B: "((\<lambda>x. (X x, Y x)) -` y) \<inter> space M \<in> sets M" using assms sets_eq_Pow by auto
   2.272 -    assume "x \<inter> y = {}"
   2.273 -    hence "(\<lambda>x. (X x, Y x)) -` x \<inter> space M \<inter> ((\<lambda>x. (X x, Y x)) -` y \<inter> space M) = {}"
   2.274 -      by auto
   2.275 -    from additive[unfolded additive_def, rule_format, OF A B] this
   2.276 -      finite_measure[OF A] finite_measure[OF B]
   2.277 -    show "?D (x \<union> y) = ?D x + ?D y"
   2.278 -      apply (simp add: distribution_def)
   2.279 -      apply (subst Int_Un_distrib2)
   2.280 -      by (auto simp: real_of_pinfreal_add)
   2.281 -  qed
   2.282 -
   2.283 -  show "finite (space ?M)"
   2.284 +proof (rule finite_measure_spaceI, simp_all)
   2.285 +  show "finite (s1 \<times> s2)"
   2.286      using assms by auto
   2.287 -
   2.288 -  show "sets ?M = Pow (space ?M)"
   2.289 -    by simp
   2.290 -
   2.291 -  { fix x assume "x \<in> space ?M" thus "?D {x} \<noteq> \<omega>"
   2.292 -    unfolding distribution_def by (auto intro!: finite_measure simp: sets_eq_Pow) }
   2.293 +  show "joint_distribution X Y (s1\<times>s2) \<noteq> \<omega>"
   2.294 +    using distribution_finite .
   2.295 +next
   2.296 +  fix A B :: "('x*'y) set" assume "A \<inter> B = {}"
   2.297 +  then show "joint_distribution X Y (A \<union> B) = joint_distribution X Y A + joint_distribution X Y B"
   2.298 +    unfolding distribution_def
   2.299 +    by (subst measure_additive)
   2.300 +       (auto intro!: arg_cong[where f=\<mu>] simp: sets_eq_Pow)
   2.301  qed
   2.302  
   2.303 -lemma (in finite_measure_space) finite_product_measure_space_of_images:
   2.304 +lemma (in finite_prob_space) finite_product_measure_space_of_images:
   2.305    shows "finite_measure_space \<lparr> space = X ` space M \<times> Y ` space M,
   2.306                                  sets = Pow (X ` space M \<times> Y ` space M) \<rparr>
   2.307                                (joint_distribution X Y)"
     3.1 --- a/src/HOL/Probability/Product_Measure.thy	Thu Sep 02 17:28:00 2010 +0200
     3.2 +++ b/src/HOL/Probability/Product_Measure.thy	Thu Sep 02 19:51:53 2010 +0200
     3.3 @@ -315,7 +315,7 @@
     3.4    from subst[OF this, of "\<lambda> M. A \<in> sets M", OF A]
     3.5    show ?thesis by auto
     3.6  qed
     3.7 -
     3.8 +(*
     3.9  lemma
    3.10    assumes sfin: "range A \<subseteq> sets M" "(\<Union>i. A i) = space M" "\<And> i :: nat. \<nu> (A i) < \<omega>"
    3.11    assumes A: "\<And>  i. \<mu> (A i) = \<nu> (A i)" "\<And> i. A i \<subseteq> A (Suc i)"
    3.12 @@ -345,7 +345,7 @@
    3.13        apply (auto simp add:image_def) (* TODO *) sorry
    3.14      show ?thesis sorry
    3.15  qed
    3.16 -
    3.17 +*)
    3.18  definition prod_sets where
    3.19    "prod_sets A B = {z. \<exists>x \<in> A. \<exists>y \<in> B. z = x \<times> y}"
    3.20  
    3.21 @@ -512,36 +512,25 @@
    3.22  qed
    3.23  
    3.24  lemma (in finite_measure_space) finite_measure_space_finite_prod_measure:
    3.25 -  assumes "finite_measure_space N \<nu>"
    3.26 +  fixes N :: "('c, 'd) algebra_scheme"
    3.27 +  assumes N: "finite_measure_space N \<nu>"
    3.28    shows "finite_measure_space (prod_measure_space M N) (prod_measure M \<mu> N \<nu>)"
    3.29    unfolding finite_prod_measure_space[OF assms]
    3.30 -proof (rule finite_measure_spaceI)
    3.31 +proof (rule finite_measure_spaceI, simp_all)
    3.32    interpret N: finite_measure_space N \<nu> by fact
    3.33 -
    3.34 -  let ?P = "\<lparr>space = space M \<times> space N, sets = Pow (space M \<times> space N)\<rparr>"
    3.35 -  show "measure_space ?P (prod_measure M \<mu> N \<nu>)"
    3.36 -  proof (rule sigma_algebra.finite_additivity_sufficient)
    3.37 -    show "sigma_algebra ?P" by (rule sigma_algebra_Pow)
    3.38 -    show "finite (space ?P)" using finite_space N.finite_space by auto
    3.39 -    from finite_prod_measure_times[OF assms, of "{}" "{}"]
    3.40 -    show "positive (prod_measure M \<mu> N \<nu>)"
    3.41 -      unfolding positive_def by simp
    3.42 +  show "finite (space M \<times> space N)" using finite_space N.finite_space by auto
    3.43 +  show "prod_measure M \<mu> N \<nu> (space M \<times> space N) \<noteq> \<omega>"
    3.44 +    using finite_prod_measure_times[OF N top N.top] by simp
    3.45 +  show "prod_measure M \<mu> N \<nu> {} = 0"
    3.46 +    using finite_prod_measure_times[OF N empty_sets N.empty_sets] by simp
    3.47  
    3.48 -    show "additive ?P (prod_measure M \<mu> N \<nu>)"
    3.49 -      unfolding additive_def
    3.50 -      apply (auto simp add: sets_eq_Pow prod_measure_def positive_integral_add[symmetric]
    3.51 -                  intro!: positive_integral_cong)
    3.52 -      apply (subst N.measure_additive[symmetric])
    3.53 -      by (auto simp: N.sets_eq_Pow sets_eq_Pow)
    3.54 -  qed
    3.55 -  show "finite (space ?P)" using finite_space N.finite_space by auto
    3.56 -  show "sets ?P = Pow (space ?P)" by simp
    3.57 -
    3.58 -  fix x assume "x \<in> space ?P"
    3.59 -  with finite_prod_measure_times[OF assms, of "{fst x}" "{snd x}"]
    3.60 -    finite_measure[of "{fst x}"] N.finite_measure[of "{snd x}"]
    3.61 -  show "prod_measure M \<mu> N \<nu> {x} \<noteq> \<omega>"
    3.62 -    by (auto simp add: sets_eq_Pow N.sets_eq_Pow elim!: SigmaE)
    3.63 +  fix A B :: "('a * 'c) set" assume "A \<inter> B = {}" "A \<subseteq> space M \<times> space N" "B \<subseteq> space M \<times> space N"
    3.64 +  then show "prod_measure M \<mu> N \<nu> (A \<union> B) = prod_measure M \<mu> N \<nu> A + prod_measure M \<mu> N \<nu> B"
    3.65 +    apply (auto simp add: sets_eq_Pow prod_measure_def positive_integral_add[symmetric]
    3.66 +                intro!: positive_integral_cong)
    3.67 +    apply (subst N.measure_additive)
    3.68 +    apply (auto intro!: arg_cong[where f=\<mu>] simp: N.sets_eq_Pow sets_eq_Pow)
    3.69 +    done
    3.70  qed
    3.71  
    3.72  lemma (in finite_measure_space) finite_measure_space_finite_prod_measure_alterantive:
    3.73 @@ -551,4 +540,18 @@
    3.74    unfolding finite_prod_measure_space[OF N, symmetric]
    3.75    using finite_measure_space_finite_prod_measure[OF N] .
    3.76  
    3.77 +lemma prod_measure_times_finite:
    3.78 +  assumes fms: "finite_measure_space M \<mu>" "finite_measure_space N \<nu>" and a: "a \<in> space M \<times> space N"
    3.79 +  shows "prod_measure M \<mu> N \<nu> {a} = \<mu> {fst a} * \<nu> {snd a}"
    3.80 +proof (cases a)
    3.81 +  case (Pair b c)
    3.82 +  hence a_eq: "{a} = {b} \<times> {c}" by simp
    3.83 +
    3.84 +  interpret M: finite_measure_space M \<mu> by fact
    3.85 +  interpret N: finite_measure_space N \<nu> by fact
    3.86 +
    3.87 +  from finite_measure_space.finite_prod_measure_times[OF fms, of "{b}" "{c}"] M.sets_eq_Pow N.sets_eq_Pow a Pair
    3.88 +  show ?thesis unfolding a_eq by simp
    3.89 +qed
    3.90 +
    3.91  end
     4.1 --- a/src/HOL/Probability/Radon_Nikodym.thy	Thu Sep 02 17:28:00 2010 +0200
     4.2 +++ b/src/HOL/Probability/Radon_Nikodym.thy	Thu Sep 02 19:51:53 2010 +0200
     4.3 @@ -64,6 +64,30 @@
     4.4  definition (in measure_space)
     4.5    "absolutely_continuous \<nu> = (\<forall>N\<in>null_sets. \<nu> N = (0 :: pinfreal))"
     4.6  
     4.7 +lemma (in finite_measure_space) absolutely_continuousI:
     4.8 +  assumes "finite_measure_space M \<nu>"
     4.9 +  assumes v: "\<And>x. \<lbrakk> x \<in> space M ; \<mu> {x} = 0 \<rbrakk> \<Longrightarrow> \<nu> {x} = 0"
    4.10 +  shows "absolutely_continuous \<nu>"
    4.11 +proof (unfold absolutely_continuous_def sets_eq_Pow, safe)
    4.12 +  fix N assume "\<mu> N = 0" "N \<subseteq> space M"
    4.13 +  interpret v: finite_measure_space M \<nu> by fact
    4.14 +  have "\<nu> N = \<nu> (\<Union>x\<in>N. {x})" by simp
    4.15 +  also have "\<dots> = (\<Sum>x\<in>N. \<nu> {x})"
    4.16 +  proof (rule v.measure_finitely_additive''[symmetric])
    4.17 +    show "finite N" using `N \<subseteq> space M` finite_space by (auto intro: finite_subset)
    4.18 +    show "disjoint_family_on (\<lambda>i. {i}) N" unfolding disjoint_family_on_def by auto
    4.19 +    fix x assume "x \<in> N" thus "{x} \<in> sets M" using `N \<subseteq> space M` sets_eq_Pow by auto
    4.20 +  qed
    4.21 +  also have "\<dots> = 0"
    4.22 +  proof (safe intro!: setsum_0')
    4.23 +    fix x assume "x \<in> N"
    4.24 +    hence "\<mu> {x} \<le> \<mu> N" using sets_eq_Pow `N \<subseteq> space M` by (auto intro!: measure_mono)
    4.25 +    hence "\<mu> {x} = 0" using `\<mu> N = 0` by simp
    4.26 +    thus "\<nu> {x} = 0" using v[of x] `x \<in> N` `N \<subseteq> space M` by auto
    4.27 +  qed
    4.28 +  finally show "\<nu> N = 0" .
    4.29 +qed
    4.30 +
    4.31  lemma (in finite_measure) Radon_Nikodym_aux_epsilon:
    4.32    fixes e :: real assumes "0 < e"
    4.33    assumes "finite_measure M s"