# HG changeset patch # User hoelzl # Date 1307621074 -7200 # Node ID 60e181c4eae4d6e43eb0a415fb428a4879d067a7 # Parent 9ba256ad678191558fae2f275f68fea03778a873 lemma: independence is equal to mutual information = 0 diff -r 9ba256ad6781 -r 60e181c4eae4 src/HOL/Probability/Independent_Family.thy --- a/src/HOL/Probability/Independent_Family.thy Thu Jun 09 13:55:11 2011 +0200 +++ b/src/HOL/Probability/Independent_Family.thy Thu Jun 09 14:04:34 2011 +0200 @@ -117,6 +117,16 @@ using indep[unfolded indep_set_def, THEN indep_setsD, of UNIV "bool_case a b"] ev by (simp add: ac_simps UNIV_bool) +lemma (in prob_space) indep_var_eq: + "indep_var S X T Y \ + (random_variable S X \ random_variable T Y) \ + indep_set + (sigma_sets (space M) { X -` A \ space M | A. A \ sets S}) + (sigma_sets (space M) { Y -` A \ space M | A. A \ sets T})" + unfolding indep_var_def indep_vars_def indep_set_def UNIV_bool + by (intro arg_cong2[where f="op \"] arg_cong2[where f=indep_sets] ext) + (auto split: bool.split) + lemma (in prob_space) assumes indep: "indep_set A B" shows indep_setD_ev1: "A \ events" @@ -491,7 +501,7 @@ proof (simp add: sigma_algebra_iff2, safe) let ?A = "(\n. sigma_sets (space M) (UNION {n..} A))" interpret A: sigma_algebra "\space = space M, sets = A i\" for i by fact - { fix X x assume "X \ ?A" "x \ X" + { fix X x assume "X \ ?A" "x \ X" then have "\n. X \ sigma_sets (space M) (UNION {n..} A)" by auto from this[of 0] have "X \ sigma_sets (space M) (UNION UNIV A)" by simp then have "X \ space M" @@ -572,7 +582,7 @@ show "Int_stable \space = space M, sets = A m\" unfolding Int_stable_def using A.Int by auto qed - also have "(\b. sigma_sets (space M) (\m\bool_case {..n} {Suc n..} b. A m)) = + also have "(\b. sigma_sets (space M) (\m\bool_case {..n} {Suc n..} b. A m)) = bool_case (sigma_sets (space M) (\m\{..n}. A m)) (sigma_sets (space M) (\m\{Suc n..}. A m))" by (auto intro!: ext split: bool.split) finally have indep: "indep_set (sigma_sets (space M) (\m\{..n}. A m)) (sigma_sets (space M) (\m\{Suc n..}. A m))" @@ -732,7 +742,7 @@ by (auto simp del: vimage_Int intro!: exI[of _ "A \ B"] dest: Int_stableD) qed } note indep_sets_sigma_sets_iff[OF this, simp] - + { fix i assume "i \ I" { fix A assume "A \ sets (M' i)" then have "A \ sets (sigma (M' i))" by (auto simp: sets_sigma intro: sigma_sets.Basic) @@ -745,7 +755,7 @@ "space M \ {X i -` A \ space M |A. A \ sets (M' i)}" by (auto intro!: exI[of _ "space (M' i)"]) } note indep_sets_finite[OF I this, simp] - + have "(\A\\ i\I. {X i -` A \ space M |A. A \ sets (M' i)}. prob (INTER I A) = (\j\I. prob (A j))) = (\A\\ i\I. sets (M' i). prob ((\j\I. X j -` A j) \ space M) = (\x\I. prob (X x -` A x \ space M)))" (is "?L = ?R") @@ -847,7 +857,7 @@ by (simp_all add: product_algebra_def) show "A \ sets (sigma P.G)" using `A \ sets P.P` by (simp add: product_algebra_def) - + fix E assume E: "E \ sets P.G" then have "E \ sets P.P" by (simp add: sets_sigma sigma_sets.Basic product_algebra_def) @@ -915,10 +925,67 @@ finally show ?thesis . qed +lemma (in prob_space) + assumes "indep_var S X T Y" + shows indep_var_rv1: "random_variable S X" + and indep_var_rv2: "random_variable T Y" +proof - + have "\i\UNIV. random_variable (bool_case S T i) (bool_case X Y i)" + using assms unfolding indep_var_def indep_vars_def by auto + then show "random_variable S X" "random_variable T Y" + unfolding UNIV_bool by auto +qed + lemma (in prob_space) indep_var_distributionD: - assumes "indep_var Ma A Mb B" - assumes "Xa \ sets Ma" "Xb \ sets Mb" - shows "joint_distribution A B (Xa \ Xb) = distribution A Xa * distribution B Xb" - unfolding distribution_def using assms by (rule indep_varD) + assumes indep: "indep_var S X T Y" + defines "P \ S\measure := extreal\distribution X\ \\<^isub>M T\measure := extreal\distribution Y\" + assumes "A \ sets P" + shows "joint_distribution X Y A = finite_measure.\' P A" +proof - + from indep have rvs: "random_variable S X" "random_variable T Y" + by (blast dest: indep_var_rv1 indep_var_rv2)+ + + let ?S = "S\measure := extreal\distribution X\" + let ?T = "T\measure := extreal\distribution Y\" + interpret X: prob_space ?S by (rule distribution_prob_space) fact + interpret Y: prob_space ?T by (rule distribution_prob_space) fact + interpret XY: pair_prob_space ?S ?T by default + + let ?J = "XY.P\ measure := extreal \ joint_distribution X Y \" + interpret J: prob_space ?J + by (rule joint_distribution_prob_space) (simp_all add: rvs) + + have "finite_measure.\' (XY.P\ measure := extreal \ joint_distribution X Y \) A = XY.\' A" + proof (rule prob_space_unique_Int_stable) + show "Int_stable (pair_measure_generator ?S ?T)" (is "Int_stable ?P") + by fact + show "space ?P \ sets ?P" + unfolding space_pair_measure[simplified pair_measure_def space_sigma] + using X.top Y.top by (auto intro!: pair_measure_generatorI) + + show "prob_space ?J" by default + show "space ?J = space ?P" + by (simp add: pair_measure_generator_def space_pair_measure) + show "sets ?J = sets (sigma ?P)" + by (simp add: pair_measure_def) + + show "prob_space XY.P" by default + show "space XY.P = space ?P" "sets XY.P = sets (sigma ?P)" + by (simp_all add: pair_measure_generator_def pair_measure_def) + + show "A \ sets (sigma ?P)" + using `A \ sets P` unfolding P_def pair_measure_def by simp + + fix X assume "X \ sets ?P" + then obtain A B where "A \ sets S" "B \ sets T" "X = A \ B" + by (auto simp: sets_pair_measure_generator) + then show "J.\' X = XY.\' X" + unfolding J.\'_def XY.\'_def using indep + by (simp add: XY.pair_measure_times) + (simp add: distribution_def indep_varD) + qed + then show ?thesis + using `A \ sets P` unfolding P_def J.\'_def XY.\'_def by simp +qed end diff -r 9ba256ad6781 -r 60e181c4eae4 src/HOL/Probability/Information.thy --- a/src/HOL/Probability/Information.thy Thu Jun 09 13:55:11 2011 +0200 +++ b/src/HOL/Probability/Information.thy Thu Jun 09 14:04:34 2011 +0200 @@ -7,14 +7,10 @@ theory Information imports - Probability_Measure + Independent_Family "~~/src/HOL/Library/Convex" begin -lemma (in prob_space) not_zero_less_distribution[simp]: - "(\ 0 < distribution X A) \ distribution X A = 0" - using distribution_positive[of X A] by arith - lemma log_le: "1 < a \ 0 < x \ x \ y \ log a x \ log a y" by (subst log_le_cancel_iff) auto @@ -175,7 +171,211 @@ Kullback$-$Leibler distance. *} definition - "KL_divergence b M \ = \x. log b (real (RN_deriv M \ x)) \M\measure := \\" + "entropy_density b M \ = log b \ real \ RN_deriv M \" + +definition + "KL_divergence b M \ = integral\<^isup>L (M\measure := \\) (entropy_density b M \)" + +lemma (in information_space) measurable_entropy_density: + assumes ps: "prob_space (M\measure := \\)" + assumes ac: "absolutely_continuous \" + shows "entropy_density b M \ \ borel_measurable M" +proof - + interpret \: prob_space "M\measure := \\" by fact + have "measure_space (M\measure := \\)" by fact + from RN_deriv[OF this ac] b_gt_1 show ?thesis + unfolding entropy_density_def + by (intro measurable_comp) auto +qed + +lemma (in information_space) KL_gt_0: + assumes ps: "prob_space (M\measure := \\)" + assumes ac: "absolutely_continuous \" + assumes int: "integrable (M\ measure := \ \) (entropy_density b M \)" + assumes A: "A \ sets M" "\ A \ \ A" + shows "0 < KL_divergence b M \" +proof - + interpret \: prob_space "M\measure := \\" by fact + have ms: "measure_space (M\measure := \\)" by default + have fms: "finite_measure (M\measure := \\)" by default + note RN = RN_deriv[OF ms ac] + + from real_RN_deriv[OF fms ac] guess D . note D = this + with absolutely_continuous_AE[OF ms] ac + have D\: "AE x in M\measure := \\. RN_deriv M \ x = extreal (D x)" + by auto + + def f \ "\x. if D x = 0 then 1 else 1 / D x" + with D have f_borel: "f \ borel_measurable M" + by (auto intro!: measurable_If) + + have "KL_divergence b M \ = 1 / ln b * (\ x. ln b * entropy_density b M \ x \M\measure := \\)" + unfolding KL_divergence_def using int b_gt_1 + by (simp add: integral_cmult) + + { fix A assume "A \ sets M" + with RN D have "\.\ A = (\\<^isup>+ x. extreal (D x) * indicator A x \M)" + by (auto intro!: positive_integral_cong_AE) } + note D_density = this + + have ln_entropy: "(\x. ln b * entropy_density b M \ x) \ borel_measurable M" + using measurable_entropy_density[OF ps ac] by auto + + have "integrable (M\measure := \\) (\x. ln b * entropy_density b M \ x)" + using int by auto + moreover have "integrable (M\measure := \\) (\x. ln b * entropy_density b M \ x) \ + integrable M (\x. D x * (ln b * entropy_density b M \ x))" + using D D_density ln_entropy + by (intro integral_translated_density) auto + ultimately have M_int: "integrable M (\x. D x * (ln b * entropy_density b M \ x))" + by simp + + have D_neg: "(\\<^isup>+ x. extreal (- D x) \M) = 0" + using D by (subst positive_integral_0_iff_AE) auto + + have "(\\<^isup>+ x. extreal (D x) \M) = \ (space M)" + using RN D by (auto intro!: positive_integral_cong_AE) + then have D_pos: "(\\<^isup>+ x. extreal (D x) \M) = 1" + using \.measure_space_1 by simp + + have "integrable M D" + using D_pos D_neg D by (auto simp: integrable_def) + + have "integral\<^isup>L M D = 1" + using D_pos D_neg by (auto simp: lebesgue_integral_def) + + let ?D_set = "{x\space M. D x \ 0}" + have [simp, intro]: "?D_set \ sets M" + using D by (auto intro: sets_Collect) + + have "0 \ 1 - \' ?D_set" + using prob_le_1 by (auto simp: field_simps) + also have "\ = (\ x. D x - indicator ?D_set x \M)" + using `integrable M D` `integral\<^isup>L M D = 1` + by (simp add: \'_def) + also have "\ < (\ x. D x * (ln b * entropy_density b M \ x) \M)" + proof (rule integral_less_AE) + show "integrable M (\x. D x - indicator ?D_set x)" + using `integrable M D` + by (intro integral_diff integral_indicator) auto + next + show "integrable M (\x. D x * (ln b * entropy_density b M \ x))" + by fact + next + show "\ {x\space M. D x \ 1 \ D x \ 0} \ 0" + proof + assume eq_0: "\ {x\space M. D x \ 1 \ D x \ 0} = 0" + then have disj: "AE x. D x = 1 \ D x = 0" + using D(1) by (auto intro!: AE_I[OF subset_refl] sets_Collect) + + have "\ {x\space M. D x = 1} = (\\<^isup>+ x. indicator {x\space M. D x = 1} x \M)" + using D(1) by auto + also have "\ = (\\<^isup>+ x. extreal (D x) * indicator {x\space M. D x \ 0} x \M)" + using disj by (auto intro!: positive_integral_cong_AE simp: indicator_def one_extreal_def) + also have "\ = \ {x\space M. D x \ 0}" + using D(1) D_density by auto + also have "\ = \ (space M)" + using D_density D(1) by (auto intro!: positive_integral_cong simp: indicator_def) + finally have "AE x. D x = 1" + using D(1) \.measure_space_1 by (intro AE_I_eq_1) auto + then have "(\\<^isup>+x. indicator A x\M) = (\\<^isup>+x. extreal (D x) * indicator A x\M)" + by (intro positive_integral_cong_AE) (auto simp: one_extreal_def[symmetric]) + also have "\ = \ A" + using `A \ sets M` D_density by simp + finally show False using `A \ sets M` `\ A \ \ A` by simp + qed + show "{x\space M. D x \ 1 \ D x \ 0} \ sets M" + using D(1) by (auto intro: sets_Collect) + + show "AE t. t \ {x\space M. D x \ 1 \ D x \ 0} \ + D t - indicator ?D_set t \ D t * (ln b * entropy_density b M \ t)" + using D(2) + proof (elim AE_mp, safe intro!: AE_I2) + fix t assume Dt: "t \ space M" "D t \ 1" "D t \ 0" + and RN: "RN_deriv M \ t = extreal (D t)" + and eq: "D t - indicator ?D_set t = D t * (ln b * entropy_density b M \ t)" + + have "D t - 1 = D t - indicator ?D_set t" + using Dt by simp + also note eq + also have "D t * (ln b * entropy_density b M \ t) = - D t * ln (1 / D t)" + using RN b_gt_1 `D t \ 0` `0 \ D t` + by (simp add: entropy_density_def log_def ln_div less_le) + finally have "ln (1 / D t) = 1 / D t - 1" + using `D t \ 0` by (auto simp: field_simps) + from ln_eq_minus_one[OF _ this] `D t \ 0` `0 \ D t` `D t \ 1` + show False by auto + qed + + show "AE t. D t - indicator ?D_set t \ D t * (ln b * entropy_density b M \ t)" + using D(2) + proof (elim AE_mp, intro AE_I2 impI) + fix t assume "t \ space M" and RN: "RN_deriv M \ t = extreal (D t)" + show "D t - indicator ?D_set t \ D t * (ln b * entropy_density b M \ t)" + proof cases + assume asm: "D t \ 0" + then have "0 < D t" using `0 \ D t` by auto + then have "0 < 1 / D t" by auto + have "D t - indicator ?D_set t \ - D t * (1 / D t - 1)" + using asm `t \ space M` by (simp add: field_simps) + also have "- D t * (1 / D t - 1) \ - D t * ln (1 / D t)" + using ln_le_minus_one `0 < 1 / D t` by (intro mult_left_mono_neg) auto + also have "\ = D t * (ln b * entropy_density b M \ t)" + using `0 < D t` RN b_gt_1 + by (simp_all add: log_def ln_div entropy_density_def) + finally show ?thesis by simp + qed simp + qed + qed + also have "\ = (\ x. ln b * entropy_density b M \ x \M\measure := \\)" + using D D_density ln_entropy + by (intro integral_translated_density[symmetric]) auto + also have "\ = ln b * (\ x. entropy_density b M \ x \M\measure := \\)" + using int by (rule \.integral_cmult) + finally show "0 < KL_divergence b M \" + using b_gt_1 by (auto simp: KL_divergence_def zero_less_mult_iff) +qed + +lemma (in sigma_finite_measure) KL_eq_0: + assumes eq: "\A\sets M. \ A = measure M A" + shows "KL_divergence b M \ = 0" +proof - + have "AE x. 1 = RN_deriv M \ x" + proof (rule RN_deriv_unique) + show "measure_space (M\measure := \\)" + using eq by (intro measure_space_cong) auto + show "absolutely_continuous \" + unfolding absolutely_continuous_def using eq by auto + show "(\x. 1) \ borel_measurable M" "AE x. 0 \ (1 :: extreal)" by auto + fix A assume "A \ sets M" + with eq show "\ A = \\<^isup>+ x. 1 * indicator A x \M" by simp + qed + then have "AE x. log b (real (RN_deriv M \ x)) = 0" + by (elim AE_mp) simp + from integral_cong_AE[OF this] + have "integral\<^isup>L M (entropy_density b M \) = 0" + by (simp add: entropy_density_def comp_def) + with eq show "KL_divergence b M \ = 0" + unfolding KL_divergence_def + by (subst integral_cong_measure) auto +qed + +lemma (in information_space) KL_eq_0_imp: + assumes ps: "prob_space (M\measure := \\)" + assumes ac: "absolutely_continuous \" + assumes int: "integrable (M\ measure := \ \) (entropy_density b M \)" + assumes KL: "KL_divergence b M \ = 0" + shows "\A\sets M. \ A = \ A" + by (metis less_imp_neq KL_gt_0 assms) + +lemma (in information_space) KL_ge_0: + assumes ps: "prob_space (M\measure := \\)" + assumes ac: "absolutely_continuous \" + assumes int: "integrable (M\ measure := \ \) (entropy_density b M \)" + shows "0 \ KL_divergence b M \" + using KL_eq_0 KL_gt_0[OF ps ac int] + by (cases "\A\sets M. \ A = measure M A") (auto simp: le_less) + lemma (in sigma_finite_measure) KL_divergence_vimage: assumes T: "T \ measure_preserving M M'" @@ -209,7 +409,7 @@ have AE: "AE x. RN_deriv M' \' (T x) = RN_deriv M \ x" by (rule RN_deriv_vimage[OF T T' inv \']) show ?thesis - unfolding KL_divergence_def + unfolding KL_divergence_def entropy_density_def comp_def proof (subst \'.integral_vimage[OF sa T']) show "(\x. log b (real (RN_deriv M \ x))) \ borel_measurable (M\measure := \\)" by (auto intro!: RN_deriv[OF M ac] borel_measurable_log[OF _ `1 < b`]) @@ -233,9 +433,9 @@ proof - interpret \: measure_space ?\ by fact have "KL_divergence b M \ = \x. log b (real (RN_deriv N \' x)) \?\" - by (simp cong: RN_deriv_cong \.integral_cong add: KL_divergence_def) + by (simp cong: RN_deriv_cong \.integral_cong add: KL_divergence_def entropy_density_def) also have "\ = KL_divergence b N \'" - by (auto intro!: \.integral_cong_measure[symmetric] simp: KL_divergence_def) + by (auto intro!: \.integral_cong_measure[symmetric] simp: KL_divergence_def entropy_density_def comp_def) finally show ?thesis . qed @@ -243,7 +443,7 @@ assumes v: "finite_measure_space (M\measure := \\)" assumes ac: "absolutely_continuous \" shows "KL_divergence b M \ = (\x\space M. real (\ {x}) * log b (real (\ {x}) / real (\ {x})))" (is "_ = ?sum") -proof (simp add: KL_divergence_def finite_measure_space.integral_finite_singleton[OF v]) +proof (simp add: KL_divergence_def finite_measure_space.integral_finite_singleton[OF v] entropy_density_def) interpret v: finite_measure_space "M\measure := \\" by fact have ms: "measure_space (M\measure := \\)" by default show "(\x \ space M. log b (real (RN_deriv M \ x)) * real (\ {x})) = ?sum" @@ -257,27 +457,10 @@ and "1 < b" shows "0 \ KL_divergence b M \" proof - + interpret information_space M by default fact interpret v: finite_prob_space "M\measure := \\" by fact - have ms: "finite_measure_space (M\measure := \\)" by default - - have "- (KL_divergence b M \) \ log b (\x\space M. real (\ {x}))" - proof (subst KL_divergence_eq_finite[OF ms ac], safe intro!: log_setsum_divide not_empty) - show "finite (space M)" using finite_space by simp - show "1 < b" by fact - show "(\x\space M. real (\ {x})) = 1" - using v.finite_sum_over_space_eq_1 by (simp add: v.\'_def) - - fix x assume "x \ space M" - then have x: "{x} \ sets M" unfolding sets_eq_Pow by auto - { assume "0 < real (\ {x})" - then have "\ {x} \ 0" by auto - then have "\ {x} \ 0" - using ac[unfolded absolutely_continuous_def, THEN bspec, of "{x}"] x by auto - thus "0 < real (\ {x})" using real_measure[OF x] by auto } - show "0 \ real (\ {x})" "0 \ real (\ {x})" - using real_measure[OF x] v.real_measure[of "{x}"] x by auto - qed - thus "0 \ KL_divergence b M \" using finite_sum_over_space_eq_1 by (simp add: \'_def) + have ps: "prob_space (M\measure := \\)" by default + from KL_ge_0[OF this ac v.integral_finite_singleton(1)] show ?thesis . qed subsection {* Mutual Information *} @@ -287,6 +470,163 @@ KL_divergence b (S\measure := extreal\distribution X\ \\<^isub>M T\measure := extreal\distribution Y\) (extreal\joint_distribution X Y)" +lemma (in information_space) + fixes S T X Y + defines "P \ S\measure := extreal\distribution X\ \\<^isub>M T\measure := extreal\distribution Y\" + shows "indep_var S X T Y \ + (random_variable S X \ random_variable T Y \ + measure_space.absolutely_continuous P (extreal\joint_distribution X Y) \ + integrable (P\measure := (extreal\joint_distribution X Y)\) + (entropy_density b P (extreal\joint_distribution X Y)) \ + mutual_information b S T X Y = 0)" +proof safe + assume indep: "indep_var S X T Y" + then have "random_variable S X" "random_variable T Y" + by (blast dest: indep_var_rv1 indep_var_rv2)+ + then show "sigma_algebra S" "X \ measurable M S" "sigma_algebra T" "Y \ measurable M T" + by blast+ + + interpret X: prob_space "S\measure := extreal\distribution X\" + by (rule distribution_prob_space) fact + interpret Y: prob_space "T\measure := extreal\distribution Y\" + by (rule distribution_prob_space) fact + interpret XY: pair_prob_space "S\measure := extreal\distribution X\" "T\measure := extreal\distribution Y\" by default + interpret XY: information_space XY.P b by default (rule b_gt_1) + + let ?J = "XY.P\ measure := (extreal\joint_distribution X Y) \" + { fix A assume "A \ sets XY.P" + then have "extreal (joint_distribution X Y A) = XY.\ A" + using indep_var_distributionD[OF indep] + by (simp add: XY.P.finite_measure_eq) } + note j_eq = this + + interpret J: prob_space ?J + using j_eq by (intro XY.prob_space_cong) auto + + have ac: "XY.absolutely_continuous (extreal\joint_distribution X Y)" + by (simp add: XY.absolutely_continuous_def j_eq) + then show "measure_space.absolutely_continuous P (extreal\joint_distribution X Y)" + unfolding P_def . + + have ed: "entropy_density b XY.P (extreal\joint_distribution X Y) \ borel_measurable XY.P" + by (rule XY.measurable_entropy_density) (default | fact)+ + + have "AE x in XY.P. 1 = RN_deriv XY.P (extreal\joint_distribution X Y) x" + proof (rule XY.RN_deriv_unique[OF _ ac]) + show "measure_space ?J" by default + fix A assume "A \ sets XY.P" + then show "(extreal\joint_distribution X Y) A = (\\<^isup>+ x. 1 * indicator A x \XY.P)" + by (simp add: j_eq) + qed (insert XY.measurable_const[of 1 borel], auto) + then have ae_XY: "AE x in XY.P. entropy_density b XY.P (extreal\joint_distribution X Y) x = 0" + by (elim XY.AE_mp) (simp add: entropy_density_def) + have ae_J: "AE x in ?J. entropy_density b XY.P (extreal\joint_distribution X Y) x = 0" + proof (rule XY.absolutely_continuous_AE) + show "measure_space ?J" by default + show "XY.absolutely_continuous (measure ?J)" + using ac by simp + qed (insert ae_XY, simp_all) + then show "integrable (P\measure := (extreal\joint_distribution X Y)\) + (entropy_density b P (extreal\joint_distribution X Y))" + unfolding P_def + using ed XY.measurable_const[of 0 borel] + by (subst J.integrable_cong_AE) auto + + show "mutual_information b S T X Y = 0" + unfolding mutual_information_def KL_divergence_def P_def + by (subst J.integral_cong_AE[OF ae_J]) simp +next + assume "sigma_algebra S" "X \ measurable M S" "sigma_algebra T" "Y \ measurable M T" + then have rvs: "random_variable S X" "random_variable T Y" by blast+ + + interpret X: prob_space "S\measure := extreal\distribution X\" + by (rule distribution_prob_space) fact + interpret Y: prob_space "T\measure := extreal\distribution Y\" + by (rule distribution_prob_space) fact + interpret XY: pair_prob_space "S\measure := extreal\distribution X\" "T\measure := extreal\distribution Y\" by default + interpret XY: information_space XY.P b by default (rule b_gt_1) + + let ?J = "XY.P\ measure := (extreal\joint_distribution X Y) \" + interpret J: prob_space ?J + using rvs by (intro joint_distribution_prob_space) auto + + assume ac: "measure_space.absolutely_continuous P (extreal\joint_distribution X Y)" + assume int: "integrable (P\measure := (extreal\joint_distribution X Y)\) + (entropy_density b P (extreal\joint_distribution X Y))" + assume I_eq_0: "mutual_information b S T X Y = 0" + + have eq: "\A\sets XY.P. (extreal \ joint_distribution X Y) A = XY.\ A" + proof (rule XY.KL_eq_0_imp) + show "prob_space ?J" by default + show "XY.absolutely_continuous (extreal\joint_distribution X Y)" + using ac by (simp add: P_def) + show "integrable ?J (entropy_density b XY.P (extreal\joint_distribution X Y))" + using int by (simp add: P_def) + show "KL_divergence b XY.P (extreal\joint_distribution X Y) = 0" + using I_eq_0 unfolding mutual_information_def by (simp add: P_def) + qed + + { fix S X assume "sigma_algebra S" + interpret S: sigma_algebra S by fact + have "Int_stable \space = space M, sets = {X -` A \ space M |A. A \ sets S}\" + proof (safe intro!: Int_stableI) + fix A B assume "A \ sets S" "B \ sets S" + then show "\C. (X -` A \ space M) \ (X -` B \ space M) = (X -` C \ space M) \ C \ sets S" + by (intro exI[of _ "A \ B"]) auto + qed } + note Int_stable = this + + show "indep_var S X T Y" unfolding indep_var_eq + proof (intro conjI indep_set_sigma_sets Int_stable) + show "indep_set {X -` A \ space M |A. A \ sets S} {Y -` A \ space M |A. A \ sets T}" + proof (safe intro!: indep_setI) + { fix A assume "A \ sets S" then show "X -` A \ space M \ sets M" + using `X \ measurable M S` by (auto intro: measurable_sets) } + { fix A assume "A \ sets T" then show "Y -` A \ space M \ sets M" + using `Y \ measurable M T` by (auto intro: measurable_sets) } + next + fix A B assume ab: "A \ sets S" "B \ sets T" + have "extreal (prob ((X -` A \ space M) \ (Y -` B \ space M))) = + extreal (joint_distribution X Y (A \ B))" + unfolding distribution_def + by (intro arg_cong[where f="\C. extreal (prob C)"]) auto + also have "\ = XY.\ (A \ B)" + using ab eq by (auto simp: XY.finite_measure_eq) + also have "\ = extreal (distribution X A) * extreal (distribution Y B)" + using ab by (simp add: XY.pair_measure_times) + finally show "prob ((X -` A \ space M) \ (Y -` B \ space M)) = + prob (X -` A \ space M) * prob (Y -` B \ space M)" + unfolding distribution_def by simp + qed + qed fact+ +qed + +lemma (in information_space) mutual_information_commute_generic: + assumes X: "random_variable S X" and Y: "random_variable T Y" + assumes ac: "measure_space.absolutely_continuous + (S\measure := extreal\distribution X\ \\<^isub>M T\measure := extreal\distribution Y\) (extreal\joint_distribution X Y)" + shows "mutual_information b S T X Y = mutual_information b T S Y X" +proof - + let ?S = "S\measure := extreal\distribution X\" and ?T = "T\measure := extreal\distribution Y\" + interpret S: prob_space ?S using X by (rule distribution_prob_space) + interpret T: prob_space ?T using Y by (rule distribution_prob_space) + interpret P: pair_prob_space ?S ?T .. + interpret Q: pair_prob_space ?T ?S .. + show ?thesis + unfolding mutual_information_def + proof (intro Q.KL_divergence_vimage[OF Q.measure_preserving_swap _ _ _ _ ac b_gt_1]) + show "(\(x,y). (y,x)) \ measure_preserving + (P.P \ measure := extreal\joint_distribution X Y\) (Q.P \ measure := extreal\joint_distribution Y X\)" + using X Y unfolding measurable_def + unfolding measure_preserving_def using P.pair_sigma_algebra_swap_measurable + by (auto simp add: space_pair_measure distribution_def intro!: arg_cong[where f=\']) + have "prob_space (P.P\ measure := extreal\joint_distribution X Y\)" + using X Y by (auto intro!: distribution_prob_space random_variable_pairI) + then show "measure_space (P.P\ measure := extreal\joint_distribution X Y\)" + unfolding prob_space_def by simp + qed auto +qed + definition (in prob_space) "entropy b s X = mutual_information b s s X X" @@ -356,32 +696,6 @@ unfolding mutual_information_def . qed -lemma (in information_space) mutual_information_commute_generic: - assumes X: "random_variable S X" and Y: "random_variable T Y" - assumes ac: "measure_space.absolutely_continuous - (S\measure := extreal\distribution X\ \\<^isub>M T\measure := extreal\distribution Y\) (extreal\joint_distribution X Y)" - shows "mutual_information b S T X Y = mutual_information b T S Y X" -proof - - let ?S = "S\measure := extreal\distribution X\" and ?T = "T\measure := extreal\distribution Y\" - interpret S: prob_space ?S using X by (rule distribution_prob_space) - interpret T: prob_space ?T using Y by (rule distribution_prob_space) - interpret P: pair_prob_space ?S ?T .. - interpret Q: pair_prob_space ?T ?S .. - show ?thesis - unfolding mutual_information_def - proof (intro Q.KL_divergence_vimage[OF Q.measure_preserving_swap _ _ _ _ ac b_gt_1]) - show "(\(x,y). (y,x)) \ measure_preserving - (P.P \ measure := extreal\joint_distribution X Y\) (Q.P \ measure := extreal\joint_distribution Y X\)" - using X Y unfolding measurable_def - unfolding measure_preserving_def using P.pair_sigma_algebra_swap_measurable - by (auto simp add: space_pair_measure distribution_def intro!: arg_cong[where f=\']) - have "prob_space (P.P\ measure := extreal\joint_distribution X Y\)" - using X Y by (auto intro!: distribution_prob_space random_variable_pairI) - then show "measure_space (P.P\ measure := extreal\joint_distribution X Y\)" - unfolding prob_space_def by simp - qed auto -qed - lemma (in information_space) mutual_information_commute: assumes X: "finite_random_variable S X" and Y: "finite_random_variable T Y" shows "mutual_information b S T X Y = mutual_information b T S Y X" diff -r 9ba256ad6781 -r 60e181c4eae4 src/HOL/Probability/Probability_Measure.thy --- a/src/HOL/Probability/Probability_Measure.thy Thu Jun 09 13:55:11 2011 +0200 +++ b/src/HOL/Probability/Probability_Measure.thy Thu Jun 09 14:04:34 2011 +0200 @@ -275,7 +275,7 @@ proof - have "expectation X < expectation (\x. b)" using gt measure_space_1 - by (intro integral_less_AE) auto + by (intro integral_less_AE_space) auto then show ?thesis using prob_space by simp qed @@ -286,7 +286,7 @@ proof - have "expectation (\x. a) < expectation X" using gt measure_space_1 - by (intro integral_less_AE) auto + by (intro integral_less_AE_space) auto then show ?thesis using prob_space by simp qed diff -r 9ba256ad6781 -r 60e181c4eae4 src/HOL/Probability/Radon_Nikodym.thy --- a/src/HOL/Probability/Radon_Nikodym.thy Thu Jun 09 13:55:11 2011 +0200 +++ b/src/HOL/Probability/Radon_Nikodym.thy Thu Jun 09 14:04:34 2011 +0200 @@ -1311,6 +1311,59 @@ by (auto simp: RN_deriv_positive_integral[OF ms \(2)]) qed +lemma (in sigma_finite_measure) real_RN_deriv: + assumes "finite_measure (M\measure := \\)" (is "finite_measure ?\") + assumes ac: "absolutely_continuous \" + obtains D where "D \ borel_measurable M" + and "AE x. RN_deriv M \ x = extreal (D x)" + and "AE x in M\measure := \\. 0 < D x" + and "\x. 0 \ D x" +proof + interpret \: finite_measure ?\ by fact + have ms: "measure_space ?\" by default + note RN = RN_deriv[OF ms ac] + + let ?RN = "\t. {x \ space M. RN_deriv M \ x = t}" + + show "(\x. real (RN_deriv M \ x)) \ borel_measurable M" + using RN by auto + + have "\ (?RN \) = (\\<^isup>+ x. RN_deriv M \ x * indicator (?RN \) x \M)" + using RN by auto + also have "\ = (\\<^isup>+ x. \ * indicator (?RN \) x \M)" + by (intro positive_integral_cong) (auto simp: indicator_def) + also have "\ = \ * \ (?RN \)" + using RN by (intro positive_integral_cmult_indicator) auto + finally have eq: "\ (?RN \) = \ * \ (?RN \)" . + moreover + have "\ (?RN \) = 0" + proof (rule ccontr) + assume "\ {x \ space M. RN_deriv M \ x = \} \ 0" + moreover from RN have "0 \ \ {x \ space M. RN_deriv M \ x = \}" by auto + ultimately have "0 < \ {x \ space M. RN_deriv M \ x = \}" by auto + with eq have "\ (?RN \) = \" by simp + with \.finite_measure[of "?RN \"] RN show False by auto + qed + ultimately have "AE x. RN_deriv M \ x < \" + using RN by (intro AE_iff_measurable[THEN iffD2]) auto + then show "AE x. RN_deriv M \ x = extreal (real (RN_deriv M \ x))" + using RN(3) by (auto simp: extreal_real) + then have eq: "AE x in (M\measure := \\) . RN_deriv M \ x = extreal (real (RN_deriv M \ x))" + using ac absolutely_continuous_AE[OF ms] by auto + + show "\x. 0 \ real (RN_deriv M \ x)" + using RN by (auto intro: real_of_extreal_pos) + + have "\ (?RN 0) = (\\<^isup>+ x. RN_deriv M \ x * indicator (?RN 0) x \M)" + using RN by auto + also have "\ = (\\<^isup>+ x. 0 \M)" + by (intro positive_integral_cong) (auto simp: indicator_def) + finally have "AE x in (M\measure := \\). RN_deriv M \ x \ 0" + using RN by (intro \.AE_iff_measurable[THEN iffD2]) auto + with RN(3) eq show "AE x in (M\measure := \\). 0 < real (RN_deriv M \ x)" + by (auto simp: zero_less_real_of_extreal le_less) +qed + lemma (in sigma_finite_measure) RN_deriv_singleton: assumes "measure_space (M\measure := \\)" and ac: "absolutely_continuous \"