diff -r f9ff311992b6 -r 3e39b0e730d6 src/HOL/Probability/Information.thy --- a/src/HOL/Probability/Information.thy Wed Feb 02 10:35:41 2011 +0100 +++ b/src/HOL/Probability/Information.thy Wed Feb 02 12:34:45 2011 +0100 @@ -165,43 +165,45 @@ Kullback$-$Leibler distance. *} definition - "KL_divergence b M \ \ = - measure_space.integral M \ (\x. log b (real (sigma_finite_measure.RN_deriv M \ \ x)))" + "KL_divergence b M \ = \x. log b (real (RN_deriv M \ x)) \M\measure := \\" lemma (in sigma_finite_measure) KL_divergence_cong: - assumes "measure_space M \" - and cong: "\A. A \ sets M \ \' A = \ A" "\A. A \ sets M \ \' A = \ A" - shows "KL_divergence b M \' \' = KL_divergence b M \ \" + assumes "measure_space (M\measure := \\)" (is "measure_space ?\") + assumes [simp]: "sets N = sets M" "space N = space M" + "\A. A \ sets M \ measure N A = \ A" + "\A. A \ sets M \ \ A = \' A" + shows "KL_divergence b M \ = KL_divergence b N \'" proof - - interpret \: measure_space M \ by fact - show ?thesis - unfolding KL_divergence_def - using RN_deriv_cong[OF cong, of "\A. A"] - by (simp add: cong \.integral_cong_measure[OF cong(2)]) + interpret \: measure_space ?\ by fact + have "KL_divergence b M \ = \x. log b (real (RN_deriv N \' x)) \?\" + by (simp cong: RN_deriv_cong \.integral_cong add: KL_divergence_def) + also have "\ = KL_divergence b N \'" + by (auto intro!: \.integral_cong_measure[symmetric] simp: KL_divergence_def) + finally show ?thesis . qed lemma (in finite_measure_space) KL_divergence_eq_finite: - assumes v: "finite_measure_space M \" + assumes v: "finite_measure_space (M\measure := \\)" assumes ac: "absolutely_continuous \" - shows "KL_divergence b M \ \ = (\x\space M. real (\ {x}) * log b (real (\ {x}) / real (\ {x})))" (is "_ = ?sum") + shows "KL_divergence b M \ = (\x\space M. real (\ {x}) * log b (real (\ {x}) / real (\ {x})))" (is "_ = ?sum") proof (simp add: KL_divergence_def finite_measure_space.integral_finite_singleton[OF v]) - interpret v: finite_measure_space M \ by fact - have ms: "measure_space M \" by fact - show "(\x \ space M. log b (real (RN_deriv \ x)) * real (\ {x})) = ?sum" + interpret v: finite_measure_space "M\measure := \\" by fact + have ms: "measure_space (M\measure := \\)" by default + show "(\x \ space M. log b (real (RN_deriv M \ x)) * real (\ {x})) = ?sum" using RN_deriv_finite_measure[OF ms ac] by (auto intro!: setsum_cong simp: field_simps real_of_pextreal_mult[symmetric]) qed lemma (in finite_prob_space) KL_divergence_positive_finite: - assumes v: "finite_prob_space M \" + assumes v: "finite_prob_space (M\measure := \\)" assumes ac: "absolutely_continuous \" and "1 < b" - shows "0 \ KL_divergence b M \ \" + shows "0 \ KL_divergence b M \" proof - - interpret v: finite_prob_space M \ using v . - have ms: "finite_measure_space M \" by default + interpret v: finite_prob_space "M\measure := \\" by fact + have ms: "finite_measure_space (M\measure := \\)" by default - have "- (KL_divergence b M \ \) \ log b (\x\space M. real (\ {x}))" + have "- (KL_divergence b M \) \ log b (\x\space M. real (\ {x}))" proof (subst KL_divergence_eq_finite[OF ms ac], safe intro!: log_setsum_divide not_empty) show "finite (space M)" using finite_space by simp show "1 < b" by fact @@ -215,16 +217,15 @@ using ac[unfolded absolutely_continuous_def, THEN bspec, of "{x}"] x by auto thus "0 < prob {x}" using finite_measure[of "{x}"] x by auto } qed auto - thus "0 \ KL_divergence b M \ \" using finite_sum_over_space_eq_1 by simp + thus "0 \ KL_divergence b M \" using finite_sum_over_space_eq_1 by simp qed subsection {* Mutual Information *} definition (in prob_space) "mutual_information b S T X Y = - KL_divergence b (sigma (pair_algebra S T)) - (joint_distribution X Y) - (pair_sigma_finite.pair_measure S (distribution X) T (distribution Y))" + KL_divergence b (S\measure := distribution X\ \\<^isub>M T\measure := distribution Y\) + (joint_distribution X Y)" definition (in prob_space) "entropy b s X = mutual_information b s s X X" @@ -232,32 +233,49 @@ abbreviation (in information_space) mutual_information_Pow ("\'(_ ; _')") where "\(X ; Y) \ mutual_information b - \ space = X`space M, sets = Pow (X`space M) \ - \ space = Y`space M, sets = Pow (Y`space M) \ X Y" + \ space = X`space M, sets = Pow (X`space M), measure = distribution X \ + \ space = Y`space M, sets = Pow (Y`space M), measure = distribution Y \ X Y" + +lemma algebra_measure_update[simp]: + "algebra (M'\measure := m\) \ algebra M'" + unfolding algebra_def by simp + +lemma sigma_algebra_measure_update[simp]: + "sigma_algebra (M'\measure := m\) \ sigma_algebra M'" + unfolding sigma_algebra_def sigma_algebra_axioms_def by simp + +lemma finite_sigma_algebra_measure_update[simp]: + "finite_sigma_algebra (M'\measure := m\) \ finite_sigma_algebra M'" + unfolding finite_sigma_algebra_def finite_sigma_algebra_axioms_def by simp lemma (in prob_space) finite_variables_absolutely_continuous: assumes X: "finite_random_variable S X" and Y: "finite_random_variable T Y" - shows "measure_space.absolutely_continuous (sigma (pair_algebra S T)) - (pair_sigma_finite.pair_measure S (distribution X) T (distribution Y)) (joint_distribution X Y)" + shows "measure_space.absolutely_continuous + (S\measure := distribution X\ \\<^isub>M T\measure := distribution Y\) + (joint_distribution X Y)" proof - - interpret X: finite_prob_space S "distribution X" using X by (rule distribution_finite_prob_space) - interpret Y: finite_prob_space T "distribution Y" using Y by (rule distribution_finite_prob_space) - interpret XY: pair_finite_prob_space S "distribution X" T "distribution Y" by default - interpret P: finite_prob_space XY.P "joint_distribution X Y" - using assms by (intro joint_distribution_finite_prob_space) + interpret X: finite_prob_space "S\measure := distribution X\" + using X by (rule distribution_finite_prob_space) + interpret Y: finite_prob_space "T\measure := distribution Y\" + using Y by (rule distribution_finite_prob_space) + interpret XY: pair_finite_prob_space + "S\measure := distribution X\" "T\ measure := distribution Y\" by default + interpret P: finite_prob_space "XY.P\ measure := joint_distribution X Y\" + using assms by (auto intro!: joint_distribution_finite_prob_space) + note rv = assms[THEN finite_random_variableD] show "XY.absolutely_continuous (joint_distribution X Y)" proof (rule XY.absolutely_continuousI) - show "finite_measure_space XY.P (joint_distribution X Y)" by default - fix x assume "x \ space XY.P" and "XY.pair_measure {x} = 0" + show "finite_measure_space (XY.P\ measure := joint_distribution X Y\)" by default + fix x assume "x \ space XY.P" and "XY.\ {x} = 0" then obtain a b where "(a, b) = x" and "a \ space S" "b \ space T" and distr: "distribution X {a} * distribution Y {b} = 0" - by (cases x) (auto simp: pair_algebra_def) - with assms[THEN finite_random_variableD] - joint_distribution_Times_le_fst[of S X T Y "{a}" "{b}"] - joint_distribution_Times_le_snd[of S X T Y "{a}" "{b}"] + by (cases x) (auto simp: space_pair_measure) + with X.sets_eq_Pow Y.sets_eq_Pow + joint_distribution_Times_le_fst[OF rv, of "{a}" "{b}"] + joint_distribution_Times_le_snd[OF rv, of "{a}" "{b}"] have "joint_distribution X Y {x} \ distribution Y {b}" "joint_distribution X Y {x} \ distribution X {a}" - by auto + by (auto simp del: X.sets_eq_Pow Y.sets_eq_Pow) with distr show "joint_distribution X Y {x} = 0" by auto qed qed @@ -274,19 +292,21 @@ and mutual_information_positive_generic: "0 \ mutual_information b MX MY X Y" (is ?positive) proof - - interpret X: finite_prob_space MX "distribution X" using MX by (rule distribution_finite_prob_space) - interpret Y: finite_prob_space MY "distribution Y" using MY by (rule distribution_finite_prob_space) - interpret XY: pair_finite_prob_space MX "distribution X" MY "distribution Y" by default - interpret P: finite_prob_space XY.P "joint_distribution X Y" - using assms by (intro joint_distribution_finite_prob_space) + interpret X: finite_prob_space "MX\measure := distribution X\" + using MX by (rule distribution_finite_prob_space) + interpret Y: finite_prob_space "MY\measure := distribution Y\" + using MY by (rule distribution_finite_prob_space) + interpret XY: pair_finite_prob_space "MX\measure := distribution X\" "MY\measure := distribution Y\" by default + interpret P: finite_prob_space "XY.P\measure := joint_distribution X Y\" + using assms by (auto intro!: joint_distribution_finite_prob_space) - have P_ms: "finite_measure_space XY.P (joint_distribution X Y)" by default - have P_ps: "finite_prob_space XY.P (joint_distribution X Y)" by default + have P_ms: "finite_measure_space (XY.P\measure :=joint_distribution X Y\)" by default + have P_ps: "finite_prob_space (XY.P\measure := joint_distribution X Y\)" by default show ?sum unfolding Let_def mutual_information_def by (subst XY.KL_divergence_eq_finite[OF P_ms finite_variables_absolutely_continuous[OF MX MY]]) - (auto simp add: pair_algebra_def setsum_cartesian_product' real_of_pextreal_mult[symmetric]) + (auto simp add: space_pair_measure setsum_cartesian_product' real_of_pextreal_mult[symmetric]) show ?positive using XY.KL_divergence_positive_finite[OF P_ps finite_variables_absolutely_continuous[OF MX MY] b_gt_1] @@ -301,12 +321,12 @@ by (auto simp add: ac_simps intro!: setsum_reindex_cong[OF swap_inj_on]) lemma (in information_space) mutual_information_commute_simple: - assumes X: "simple_function X" and Y: "simple_function Y" + assumes X: "simple_function M X" and Y: "simple_function M Y" shows "\(X;Y) = \(Y;X)" by (intro X Y simple_function_imp_finite_random_variable mutual_information_commute) lemma (in information_space) mutual_information_eq: - assumes "simple_function X" "simple_function Y" + assumes "simple_function M X" "simple_function M Y" shows "\(X;Y) = (\ (x,y) \ X ` space M \ Y ` space M. real (distribution (\x. (X x, Y x)) {(x,y)}) * log b (real (distribution (\x. (X x, Y x)) {(x,y)}) / (real (distribution X {x}) * real (distribution Y {y}))))" @@ -327,7 +347,7 @@ by (simp cong: distribution_cong image_cong) lemma (in information_space) mutual_information_positive: - assumes "simple_function X" "simple_function Y" + assumes "simple_function M X" "simple_function M Y" shows "0 \ \(X;Y)" using assms by (simp add: mutual_information_positive_generic) @@ -335,13 +355,14 @@ abbreviation (in information_space) entropy_Pow ("\'(_')") where - "\(X) \ entropy b \ space = X`space M, sets = Pow (X`space M) \ X" + "\(X) \ entropy b \ space = X`space M, sets = Pow (X`space M), measure = distribution X \ X" lemma (in information_space) entropy_generic_eq: assumes MX: "finite_random_variable MX X" shows "entropy b MX X = -(\ x \ space MX. real (distribution X {x}) * log b (real (distribution X {x})))" proof - - interpret MX: finite_prob_space MX "distribution X" using MX by (rule distribution_finite_prob_space) + interpret MX: finite_prob_space "MX\measure := distribution X\" + using MX by (rule distribution_finite_prob_space) let "?X x" = "real (distribution X {x})" let "?XX x y" = "real (joint_distribution X X {(x, y)})" { fix x y @@ -353,25 +374,26 @@ show ?thesis unfolding entropy_def mutual_information_generic_eq[OF MX MX] unfolding setsum_cartesian_product[symmetric] setsum_negf[symmetric] remove_XX - by (auto simp: setsum_cases MX.finite_space) + using MX.finite_space by (auto simp: setsum_cases) qed lemma (in information_space) entropy_eq: - assumes "simple_function X" + assumes "simple_function M X" shows "\(X) = -(\ x \ X ` space M. real (distribution X {x}) * log b (real (distribution X {x})))" using assms by (simp add: entropy_generic_eq) lemma (in information_space) entropy_positive: - "simple_function X \ 0 \ \(X)" + "simple_function M X \ 0 \ \(X)" unfolding entropy_def by (simp add: mutual_information_positive) lemma (in information_space) entropy_certainty_eq_0: - assumes "simple_function X" and "x \ X ` space M" and "distribution X {x} = 1" + assumes "simple_function M X" and "x \ X ` space M" and "distribution X {x} = 1" shows "\(X) = 0" proof - - interpret X: finite_prob_space "\ space = X ` space M, sets = Pow (X ` space M) \" "distribution X" - using simple_function_imp_finite_random_variable[OF `simple_function X`] - by (rule distribution_finite_prob_space) + let ?X = "\ space = X ` space M, sets = Pow (X ` space M), measure = distribution X\" + note simple_function_imp_finite_random_variable[OF `simple_function M X`] + from distribution_finite_prob_space[OF this, of "\ measure = distribution X \"] + interpret X: finite_prob_space ?X by simp have "distribution X (X ` space M - {x}) = distribution X (X ` space M) - distribution X {x}" using X.measure_compl[of "{x}"] assms by auto also have "\ = 0" using X.prob_space assms by auto @@ -383,38 +405,39 @@ hence fi: "\ y. y \ X ` space M \ real (distribution X {y}) = (if x = y then 1 else 0)" using assms by auto have y: "\y. (if x = y then 1 else 0) * log b (if x = y then 1 else 0) = 0" by simp - show ?thesis unfolding entropy_eq[OF `simple_function X`] by (auto simp: y fi) + show ?thesis unfolding entropy_eq[OF `simple_function M X`] by (auto simp: y fi) qed lemma (in information_space) entropy_le_card_not_0: - assumes "simple_function X" + assumes "simple_function M X" shows "\(X) \ log b (real (card (X ` space M \ {x . distribution X {x} \ 0})))" proof - let "?d x" = "distribution X {x}" let "?p x" = "real (?d x)" have "\(X) = (\x\X`space M. ?p x * log b (1 / ?p x))" - by (auto intro!: setsum_cong simp: entropy_eq[OF `simple_function X`] setsum_negf[symmetric] log_simps not_less) + by (auto intro!: setsum_cong simp: entropy_eq[OF `simple_function M X`] setsum_negf[symmetric] log_simps not_less) also have "\ \ log b (\x\X`space M. ?p x * (1 / ?p x))" apply (rule log_setsum') - using not_empty b_gt_1 `simple_function X` sum_over_space_real_distribution + using not_empty b_gt_1 `simple_function M X` sum_over_space_real_distribution by (auto simp: simple_function_def) also have "\ = log b (\x\X`space M. if ?d x \ 0 then 1 else 0)" - using distribution_finite[OF `simple_function X`[THEN simple_function_imp_random_variable], simplified] + using distribution_finite[OF `simple_function M X`[THEN simple_function_imp_random_variable], simplified] by (intro arg_cong[where f="\X. log b X"] setsum_cong) (auto simp: real_of_pextreal_eq_0) finally show ?thesis - using `simple_function X` by (auto simp: setsum_cases real_eq_of_nat simple_function_def) + using `simple_function M X` by (auto simp: setsum_cases real_eq_of_nat simple_function_def) qed lemma (in information_space) entropy_uniform_max: - assumes "simple_function X" + assumes "simple_function M X" assumes "\x y. \ x \ X ` space M ; y \ X ` space M \ \ distribution X {x} = distribution X {y}" shows "\(X) = log b (real (card (X ` space M)))" proof - - interpret X: finite_prob_space "\ space = X ` space M, sets = Pow (X ` space M) \" "distribution X" - using simple_function_imp_finite_random_variable[OF `simple_function X`] - by (rule distribution_finite_prob_space) + let ?X = "\ space = X ` space M, sets = Pow (X ` space M), measure = distribution X\" + note simple_function_imp_finite_random_variable[OF `simple_function M X`] + from distribution_finite_prob_space[OF this, of "\ measure = distribution X \"] + interpret X: finite_prob_space ?X by simp have card_gt0: "0 < card (X ` space M)" unfolding card_gt_0_iff - using `simple_function X` not_empty by (auto simp: simple_function_def) + using `simple_function M X` not_empty by (auto simp: simple_function_def) { fix x assume "x \ X ` space M" hence "real (distribution X {x}) = 1 / real (card (X ` space M))" proof (rule X.uniform_prob[simplified]) @@ -423,18 +446,18 @@ qed } thus ?thesis using not_empty X.finite_space b_gt_1 card_gt0 - by (simp add: entropy_eq[OF `simple_function X`] real_eq_of_nat[symmetric] log_simps) + by (simp add: entropy_eq[OF `simple_function M X`] real_eq_of_nat[symmetric] log_simps) qed lemma (in information_space) entropy_le_card: - assumes "simple_function X" + assumes "simple_function M X" shows "\(X) \ log b (real (card (X ` space M)))" proof cases assume "X ` space M \ {x. distribution X {x} \ 0} = {}" then have "\x. x\X`space M \ distribution X {x} = 0" by auto moreover have "0 < card (X`space M)" - using `simple_function X` not_empty + using `simple_function M X` not_empty by (auto simp: card_gt_0_iff simple_function_def) then have "log b 1 \ log b (real (card (X`space M)))" using b_gt_1 by (intro log_le) auto @@ -451,10 +474,10 @@ qed lemma (in information_space) entropy_commute: - assumes "simple_function X" "simple_function Y" + assumes "simple_function M X" "simple_function M Y" shows "\(\x. (X x, Y x)) = \(\x. (Y x, X x))" proof - - have sf: "simple_function (\x. (X x, Y x))" "simple_function (\x. (Y x, X x))" + have sf: "simple_function M (\x. (X x, Y x))" "simple_function M (\x. (Y x, X x))" using assms by (auto intro: simple_function_Pair) have *: "(\x. (Y x, X x))`space M = (\(a,b). (b,a))`(\x. (X x, Y x))`space M" by auto @@ -466,12 +489,12 @@ qed lemma (in information_space) entropy_eq_cartesian_product: - assumes "simple_function X" "simple_function Y" + assumes "simple_function M X" "simple_function M Y" shows "\(\x. (X x, Y x)) = -(\x\X`space M. \y\Y`space M. real (joint_distribution X Y {(x,y)}) * log b (real (joint_distribution X Y {(x,y)})))" proof - - have sf: "simple_function (\x. (X x, Y x))" + have sf: "simple_function M (\x. (X x, Y x))" using assms by (auto intro: simple_function_Pair) { fix x assume "x\(\x. (X x, Y x))`space M" then have "(\x. (X x, Y x)) -` {x} \ space M = {}" by auto @@ -485,19 +508,18 @@ subsection {* Conditional Mutual Information *} definition (in prob_space) - "conditional_mutual_information b M1 M2 M3 X Y Z \ - mutual_information b M1 (sigma (pair_algebra M2 M3)) X (\x. (Y x, Z x)) - - mutual_information b M1 M3 X Z" + "conditional_mutual_information b MX MY MZ X Y Z \ + mutual_information b MX (MY \\<^isub>M MZ) X (\x. (Y x, Z x)) - + mutual_information b MX MZ X Z" abbreviation (in information_space) conditional_mutual_information_Pow ("\'( _ ; _ | _ ')") where "\(X ; Y | Z) \ conditional_mutual_information b - \ space = X`space M, sets = Pow (X`space M) \ - \ space = Y`space M, sets = Pow (Y`space M) \ - \ space = Z`space M, sets = Pow (Z`space M) \ + \ space = X`space M, sets = Pow (X`space M), measure = distribution X \ + \ space = Y`space M, sets = Pow (Y`space M), measure = distribution Y \ + \ space = Z`space M, sets = Pow (Z`space M), measure = distribution Z \ X Y Z" - lemma (in information_space) conditional_mutual_information_generic_eq: assumes MX: "finite_random_variable MX X" and MY: "finite_random_variable MY Y" @@ -519,7 +541,7 @@ note finite_var = MX MY MZ note random_var = finite_var[THEN finite_random_variableD] - note space_simps = space_pair_algebra space_sigma algebra.simps + note space_simps = space_pair_measure space_sigma algebra.simps note YZ = finite_random_variable_pairI[OF finite_var(2,3)] note XZ = finite_random_variable_pairI[OF finite_var(1,3)] @@ -574,12 +596,12 @@ unfolding conditional_mutual_information_def unfolding mutual_information_generic_eq[OF finite_var(1,3)] unfolding mutual_information_generic_eq[OF finite_var(1) YZ] - by (simp add: space_sigma space_pair_algebra setsum_cartesian_product') + by (simp add: space_sigma space_pair_measure setsum_cartesian_product') finally show ?thesis by simp qed lemma (in information_space) conditional_mutual_information_eq: - assumes "simple_function X" "simple_function Y" "simple_function Z" + assumes "simple_function M X" "simple_function M Y" "simple_function M Z" shows "\(X;Y|Z) = (\(x, y, z) \ X`space M \ Y`space M \ Z`space M. real (distribution (\x. (X x, Y x, Z x)) {(x, y, z)}) * log b (real (distribution (\x. (X x, Y x, Z x)) {(x, y, z)}) / @@ -588,11 +610,11 @@ by simp lemma (in information_space) conditional_mutual_information_eq_mutual_information: - assumes X: "simple_function X" and Y: "simple_function Y" + assumes X: "simple_function M X" and Y: "simple_function M Y" shows "\(X ; Y) = \(X ; Y | (\x. ()))" proof - have [simp]: "(\x. ()) ` space M = {()}" using not_empty by auto - have C: "simple_function (\x. ())" by auto + have C: "simple_function M (\x. ())" by auto show ?thesis unfolding conditional_mutual_information_eq[OF X Y C] unfolding mutual_information_eq[OF X Y] @@ -608,12 +630,13 @@ lemma (in prob_space) setsum_distribution: assumes X: "finite_random_variable MX X" shows "(\a\space MX. distribution X {a}) = 1" using setsum_joint_distribution[OF assms, of "\ space = UNIV, sets = Pow UNIV \" "\x. ()" "{()}"] - using sigma_algebra_Pow[of "UNIV::unit set"] by simp + using sigma_algebra_Pow[of "UNIV::unit set" "()"] by simp lemma (in prob_space) setsum_real_distribution: + fixes MX :: "('c, 'd) measure_space_scheme" assumes X: "finite_random_variable MX X" shows "(\a\space MX. real (distribution X {a})) = 1" - using setsum_real_joint_distribution[OF assms, of "\ space = UNIV, sets = Pow UNIV \" "\x. ()" "{()}"] - using sigma_algebra_Pow[of "UNIV::unit set"] by simp + using setsum_real_joint_distribution[OF assms, of "\ space = UNIV, sets = Pow UNIV, measure = undefined \" "\x. ()" "{()}"] + using sigma_algebra_Pow[of "UNIV::unit set" "\ measure = undefined \"] by simp lemma (in information_space) conditional_mutual_information_generic_positive: assumes "finite_random_variable MX X" and "finite_random_variable MY Y" and "finite_random_variable MZ Z" @@ -633,7 +656,7 @@ have split_beta: "\f. split f = (\x. f (fst x) (snd x))" by (simp add: fun_eq_iff) - note space_simps = space_pair_algebra space_sigma algebra.simps + note space_simps = space_pair_measure space_sigma algebra.simps note finite_var = assms note YZ = finite_random_variable_pairI[OF finite_var(2,3)] @@ -672,7 +695,7 @@ unfolding setsum_cartesian_product' unfolding setsum_commute[of _ "space MY"] unfolding setsum_commute[of _ "space MZ"] - by (simp_all add: space_pair_algebra + by (simp_all add: space_pair_measure setsum_real_joint_distribution_singleton[OF `finite_random_variable MX X` YZ] setsum_real_joint_distribution_singleton[OF `finite_random_variable MY Y` finite_var(3)] setsum_real_distribution[OF `finite_random_variable MZ Z`]) @@ -704,10 +727,9 @@ qed lemma (in information_space) conditional_mutual_information_positive: - assumes "simple_function X" and "simple_function Y" and "simple_function Z" + assumes "simple_function M X" and "simple_function M Y" and "simple_function M Z" shows "0 \ \(X;Y|Z)" - using conditional_mutual_information_generic_positive[OF assms[THEN simple_function_imp_finite_random_variable]] - by simp + by (rule conditional_mutual_information_generic_positive[OF assms[THEN simple_function_imp_finite_random_variable]]) subsection {* Conditional Entropy *} @@ -717,16 +739,17 @@ abbreviation (in information_space) conditional_entropy_Pow ("\'(_ | _')") where "\(X | Y) \ conditional_entropy b - \ space = X`space M, sets = Pow (X`space M) \ - \ space = Y`space M, sets = Pow (Y`space M) \ X Y" + \ space = X`space M, sets = Pow (X`space M), measure = distribution X \ + \ space = Y`space M, sets = Pow (Y`space M), measure = distribution Y \ X Y" lemma (in information_space) conditional_entropy_positive: - "simple_function X \ simple_function Y \ 0 \ \(X | Y)" + "simple_function M X \ simple_function M Y \ 0 \ \(X | Y)" unfolding conditional_entropy_def by (auto intro!: conditional_mutual_information_positive) lemma (in measure_space) empty_measureI: "A = {} \ \ A = 0" by simp lemma (in information_space) conditional_entropy_generic_eq: + fixes MX :: "('c, 'd) measure_space_scheme" and MY :: "('e, 'f) measure_space_scheme" assumes MX: "finite_random_variable MX X" assumes MZ: "finite_random_variable MZ Z" shows "conditional_entropy b MX MZ X Z = @@ -743,7 +766,7 @@ { fix x z have "?XXZ x x z = ?XZ x z" unfolding distribution_def by (auto intro!: arg_cong[where f=\]) } note this[simp] - { fix x x' :: 'b and z assume "x' \ x" + { fix x x' :: 'c and z assume "x' \ x" then have "?XXZ x x' z = 0" by (auto simp: distribution_def intro!: arg_cong[where f=\] empty_measureI) } note this[simp] @@ -762,7 +785,6 @@ finally have "(\x'\space MX. real (?XXZ x x' z) * ?f x x' z) = - real (?XZ x z) * log b (real (?XZ x z) / real (?Z z))" . } note * = this - show ?thesis unfolding conditional_entropy_def unfolding conditional_mutual_information_generic_eq[OF MX MX MZ] @@ -772,7 +794,7 @@ qed lemma (in information_space) conditional_entropy_eq: - assumes "simple_function X" "simple_function Z" + assumes "simple_function M X" "simple_function M Z" shows "\(X | Z) = - (\(x, z)\X ` space M \ Z ` space M. real (joint_distribution X Z {(x, z)}) * @@ -781,7 +803,7 @@ by simp lemma (in information_space) conditional_entropy_eq_ce_with_hypothesis: - assumes X: "simple_function X" and Y: "simple_function Y" + assumes X: "simple_function M X" and Y: "simple_function M Y" shows "\(X | Y) = -(\y\Y`space M. real (distribution Y {y}) * (\x\X`space M. real (joint_distribution X Y {(x,y)}) / real (distribution Y {(y)}) * @@ -794,7 +816,7 @@ intro!: setsum_cong) lemma (in information_space) conditional_entropy_eq_cartesian_product: - assumes "simple_function X" "simple_function Y" + assumes "simple_function M X" "simple_function M Y" shows "\(X | Y) = -(\x\X`space M. \y\Y`space M. real (joint_distribution X Y {(x,y)}) * log b (real (joint_distribution X Y {(x,y)}) / real (distribution Y {y})))" @@ -804,7 +826,7 @@ subsection {* Equalities *} lemma (in information_space) mutual_information_eq_entropy_conditional_entropy: - assumes X: "simple_function X" and Z: "simple_function Z" + assumes X: "simple_function M X" and Z: "simple_function M Z" shows "\(X ; Z) = \(X) - \(X | Z)" proof - let "?XZ x z" = "real (joint_distribution X Z {(x, z)})" @@ -828,7 +850,7 @@ qed lemma (in information_space) conditional_entropy_less_eq_entropy: - assumes X: "simple_function X" and Z: "simple_function Z" + assumes X: "simple_function M X" and Z: "simple_function M Z" shows "\(X | Z) \ \(X)" proof - have "\(X ; Z) = \(X) - \(X | Z)" using mutual_information_eq_entropy_conditional_entropy[OF assms] . @@ -837,7 +859,7 @@ qed lemma (in information_space) entropy_chain_rule: - assumes X: "simple_function X" and Y: "simple_function Y" + assumes X: "simple_function M X" and Y: "simple_function M Y" shows "\(\x. (X x, Y x)) = \(X) + \(Y|X)" proof - let "?XY x y" = "real (joint_distribution X Y {(x, y)})" @@ -976,7 +998,7 @@ qed lemma (in information_space) entropy_partition: - assumes sf: "simple_function X" "simple_function P" + assumes sf: "simple_function M X" "simple_function M P" assumes svi: "subvimage (space M) X P" shows "\(X) = \(P) + \(X|P)" proof - @@ -1026,10 +1048,10 @@ qed corollary (in information_space) entropy_data_processing: - assumes X: "simple_function X" shows "\(f \ X) \ \(X)" + assumes X: "simple_function M X" shows "\(f \ X) \ \(X)" proof - note X - moreover have fX: "simple_function (f \ X)" using X by auto + moreover have fX: "simple_function M (f \ X)" using X by auto moreover have "subvimage (space M) X (f \ X)" by auto ultimately have "\(X) = \(f\X) + \(X|f\X)" by (rule entropy_partition) then show "\(f \ X) \ \(X)" @@ -1037,12 +1059,12 @@ qed corollary (in information_space) entropy_of_inj: - assumes X: "simple_function X" and inj: "inj_on f (X`space M)" + assumes X: "simple_function M X" and inj: "inj_on f (X`space M)" shows "\(f \ X) = \(X)" proof (rule antisym) show "\(f \ X) \ \(X)" using entropy_data_processing[OF X] . next - have sf: "simple_function (f \ X)" + have sf: "simple_function M (f \ X)" using X by auto have "\(X) = \(the_inv_into (X`space M) f \ (f \ X))" by (auto intro!: mutual_information_cong simp: entropy_def the_inv_into_f_f[OF inj])