src/HOL/Probability/Information.thy
changeset 49786 f33d5f009627
parent 49785 0a8adca22974
child 49787 d8de705b48d4
     1.1 --- a/src/HOL/Probability/Information.thy	Wed Oct 10 12:12:24 2012 +0200
     1.2 +++ b/src/HOL/Probability/Information.thy	Wed Oct 10 12:12:25 2012 +0200
     1.3 @@ -696,6 +696,98 @@
     1.4         (auto simp: borel_measurable_ereal_iff)
     1.5  qed 
     1.6  
     1.7 +lemma (in prob_space) distributed_imp_emeasure_nonzero:
     1.8 +  assumes X: "distributed M MX X Px"
     1.9 +  shows "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> 0"
    1.10 +proof
    1.11 +  note Px = distributed_borel_measurable[OF X] distributed_AE[OF X]
    1.12 +  interpret X: prob_space "distr M MX X"
    1.13 +    using distributed_measurable[OF X] by (rule prob_space_distr)
    1.14 +
    1.15 +  assume "emeasure MX {x \<in> space MX. Px x \<noteq> 0} = 0"
    1.16 +  with Px have "AE x in MX. Px x = 0"
    1.17 +    by (intro AE_I[OF subset_refl]) (auto simp: borel_measurable_ereal_iff)
    1.18 +  moreover
    1.19 +  from X.emeasure_space_1 have "(\<integral>\<^isup>+x. Px x \<partial>MX) = 1"
    1.20 +    unfolding distributed_distr_eq_density[OF X] using Px
    1.21 +    by (subst (asm) emeasure_density)
    1.22 +       (auto simp: borel_measurable_ereal_iff intro!: integral_cong cong: positive_integral_cong)
    1.23 +  ultimately show False
    1.24 +    by (simp add: positive_integral_cong_AE)
    1.25 +qed
    1.26 +
    1.27 +lemma (in information_space) entropy_le:
    1.28 +  fixes Px :: "'b \<Rightarrow> real" and MX :: "'b measure"
    1.29 +  assumes X: "distributed M MX X Px"
    1.30 +  and fin: "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> \<infinity>"
    1.31 +  and int: "integrable MX (\<lambda>x. - Px x * log b (Px x))"
    1.32 +  shows "entropy b MX X \<le> log b (measure MX {x \<in> space MX. Px x \<noteq> 0})"
    1.33 +proof -
    1.34 +  note Px = distributed_borel_measurable[OF X] distributed_AE[OF X]
    1.35 +  interpret X: prob_space "distr M MX X"
    1.36 +    using distributed_measurable[OF X] by (rule prob_space_distr)
    1.37 +
    1.38 +  have " - log b (measure MX {x \<in> space MX. Px x \<noteq> 0}) = 
    1.39 +    - log b (\<integral> x. indicator {x \<in> space MX. Px x \<noteq> 0} x \<partial>MX)"
    1.40 +    using Px fin
    1.41 +    by (subst integral_indicator) (auto simp: measure_def borel_measurable_ereal_iff)
    1.42 +  also have "- log b (\<integral> x. indicator {x \<in> space MX. Px x \<noteq> 0} x \<partial>MX) = - log b (\<integral> x. 1 / Px x \<partial>distr M MX X)"
    1.43 +    unfolding distributed_distr_eq_density[OF X] using Px
    1.44 +    apply (intro arg_cong[where f="log b"] arg_cong[where f=uminus])
    1.45 +    by (subst integral_density) (auto simp: borel_measurable_ereal_iff intro!: integral_cong)
    1.46 +  also have "\<dots> \<le> (\<integral> x. - log b (1 / Px x) \<partial>distr M MX X)"
    1.47 +  proof (rule X.jensens_inequality[of "\<lambda>x. 1 / Px x" "{0<..}" 0 1 "\<lambda>x. - log b x"])
    1.48 +    show "AE x in distr M MX X. 1 / Px x \<in> {0<..}"
    1.49 +      unfolding distributed_distr_eq_density[OF X]
    1.50 +      using Px by (auto simp: AE_density)
    1.51 +    have [simp]: "\<And>x. x \<in> space MX \<Longrightarrow> ereal (if Px x = 0 then 0 else 1) = indicator {x \<in> space MX. Px x \<noteq> 0} x"
    1.52 +      by (auto simp: one_ereal_def)
    1.53 +    have "(\<integral>\<^isup>+ x. max 0 (ereal (- (if Px x = 0 then 0 else 1))) \<partial>MX) = (\<integral>\<^isup>+ x. 0 \<partial>MX)"
    1.54 +      by (intro positive_integral_cong) (auto split: split_max)
    1.55 +    then show "integrable (distr M MX X) (\<lambda>x. 1 / Px x)"
    1.56 +      unfolding distributed_distr_eq_density[OF X] using Px
    1.57 +      by (auto simp: positive_integral_density integrable_def borel_measurable_ereal_iff fin positive_integral_max_0
    1.58 +              cong: positive_integral_cong)
    1.59 +    have "integrable MX (\<lambda>x. Px x * log b (1 / Px x)) =
    1.60 +      integrable MX (\<lambda>x. - Px x * log b (Px x))"
    1.61 +      using Px
    1.62 +      by (intro integrable_cong_AE)
    1.63 +         (auto simp: borel_measurable_ereal_iff log_divide_eq
    1.64 +                  intro!: measurable_If)
    1.65 +    then show "integrable (distr M MX X) (\<lambda>x. - log b (1 / Px x))"
    1.66 +      unfolding distributed_distr_eq_density[OF X]
    1.67 +      using Px int
    1.68 +      by (subst integral_density) (auto simp: borel_measurable_ereal_iff)
    1.69 +  qed (auto simp: minus_log_convex[OF b_gt_1])
    1.70 +  also have "\<dots> = (\<integral> x. log b (Px x) \<partial>distr M MX X)"
    1.71 +    unfolding distributed_distr_eq_density[OF X] using Px
    1.72 +    by (intro integral_cong_AE) (auto simp: AE_density log_divide_eq)
    1.73 +  also have "\<dots> = - entropy b MX X"
    1.74 +    unfolding distributed_distr_eq_density[OF X] using Px
    1.75 +    by (subst entropy_distr[OF X]) (auto simp: borel_measurable_ereal_iff integral_density)
    1.76 +  finally show ?thesis
    1.77 +    by simp
    1.78 +qed
    1.79 +
    1.80 +lemma (in information_space) entropy_le_space:
    1.81 +  fixes Px :: "'b \<Rightarrow> real" and MX :: "'b measure"
    1.82 +  assumes X: "distributed M MX X Px"
    1.83 +  and fin: "finite_measure MX"
    1.84 +  and int: "integrable MX (\<lambda>x. - Px x * log b (Px x))"
    1.85 +  shows "entropy b MX X \<le> log b (measure MX (space MX))"
    1.86 +proof -
    1.87 +  note Px = distributed_borel_measurable[OF X] distributed_AE[OF X]
    1.88 +  interpret finite_measure MX by fact
    1.89 +  have "entropy b MX X \<le> log b (measure MX {x \<in> space MX. Px x \<noteq> 0})"
    1.90 +    using int X by (intro entropy_le) auto
    1.91 +  also have "\<dots> \<le> log b (measure MX (space MX))"
    1.92 +    using Px distributed_imp_emeasure_nonzero[OF X]
    1.93 +    by (intro log_le)
    1.94 +       (auto intro!: borel_measurable_ereal_iff finite_measure_mono b_gt_1
    1.95 +                     less_le[THEN iffD2] measure_nonneg simp: emeasure_eq_measure)
    1.96 +  finally show ?thesis .
    1.97 +qed
    1.98 +
    1.99  lemma (in prob_space) uniform_distributed_params:
   1.100    assumes X: "distributed M MX X (\<lambda>x. indicator A x / measure MX A)"
   1.101    shows "A \<in> sets MX" "measure MX A \<noteq> 0"
   1.102 @@ -731,13 +823,9 @@
   1.103  qed
   1.104  
   1.105  lemma (in information_space) entropy_simple_distributed:
   1.106 -  fixes X :: "'a \<Rightarrow> 'b"
   1.107 -  assumes X: "simple_distributed M X f"
   1.108 -  shows "\<H>(X) = - (\<Sum>x\<in>X`space M. f x * log b (f x))"
   1.109 -proof (subst entropy_distr[OF simple_distributed[OF X]])
   1.110 -  show "- (\<integral>x. f x * log b (f x) \<partial>(count_space (X`space M))) = - (\<Sum>x\<in>X ` space M. f x * log b (f x))"
   1.111 -    using X by (auto simp add: lebesgue_integral_count_space_finite)
   1.112 -qed
   1.113 +  "simple_distributed M X f \<Longrightarrow> \<H>(X) = - (\<Sum>x\<in>X`space M. f x * log b (f x))"
   1.114 +  by (subst entropy_distr[OF simple_distributed])
   1.115 +     (auto simp add: lebesgue_integral_count_space_finite)
   1.116  
   1.117  lemma (in information_space) entropy_le_card_not_0:
   1.118    assumes X: "simple_distributed M X f"
   1.119 @@ -1058,7 +1146,7 @@
   1.120      by (subst distr_distr[OF measurable_snd]) (auto dest: distributed_measurable simp: comp_def)
   1.121  
   1.122    have "entropy b T Y = - (\<integral>y. Py y * log b (Py y) \<partial>T)"
   1.123 -    by (rule entropy_distr[OF T Py])
   1.124 +    by (rule entropy_distr[OF Py])
   1.125    also have "\<dots> = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))"
   1.126      using b_gt_1 Py[THEN distributed_real_measurable]
   1.127      by (subst distributed_transform_integral[OF Pxy Py, where T=snd]) (auto intro!: integral_cong)
   1.128 @@ -1083,7 +1171,7 @@
   1.129    with I1 I2 show ?thesis
   1.130      unfolding conditional_entropy_def
   1.131      apply (subst e_eq)
   1.132 -    apply (subst entropy_distr[OF ST Pxy])
   1.133 +    apply (subst entropy_distr[OF Pxy])
   1.134      unfolding minus_diff_minus
   1.135      apply (subst integral_diff(2)[symmetric])
   1.136      apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff)
   1.137 @@ -1172,14 +1260,14 @@
   1.138  proof -
   1.139    have X: "entropy b S X = - (\<integral>x. Pxy x * log b (Px (fst x)) \<partial>(S \<Otimes>\<^isub>M T))"
   1.140      using b_gt_1 Px[THEN distributed_real_measurable]
   1.141 -    apply (subst entropy_distr[OF S Px])
   1.142 +    apply (subst entropy_distr[OF Px])
   1.143      apply (subst distributed_transform_integral[OF Pxy Px, where T=fst])
   1.144      apply (auto intro!: integral_cong)
   1.145      done
   1.146  
   1.147    have Y: "entropy b T Y = - (\<integral>x. Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))"
   1.148      using b_gt_1 Py[THEN distributed_real_measurable]
   1.149 -    apply (subst entropy_distr[OF T Py])
   1.150 +    apply (subst entropy_distr[OF Py])
   1.151      apply (subst distributed_transform_integral[OF Pxy Py, where T=snd])
   1.152      apply (auto intro!: integral_cong)
   1.153      done
   1.154 @@ -1190,7 +1278,7 @@
   1.155    have ST: "sigma_finite_measure (S \<Otimes>\<^isub>M T)" ..
   1.156  
   1.157    have XY: "entropy b (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) = - (\<integral>x. Pxy x * log b (Pxy x) \<partial>(S \<Otimes>\<^isub>M T))"
   1.158 -    by (subst entropy_distr[OF ST Pxy]) (auto intro!: integral_cong)
   1.159 +    by (subst entropy_distr[OF Pxy]) (auto intro!: integral_cong)
   1.160    
   1.161    have "AE x in S \<Otimes>\<^isub>M T. Px (fst x) = 0 \<longrightarrow> Pxy x = 0"
   1.162      by (intro subdensity_real[of fst, OF _ Pxy Px]) (auto intro: measurable_Pair)
   1.163 @@ -1303,7 +1391,7 @@
   1.164    also have "\<dots> = - (\<Sum>x\<in>(\<lambda>x. (Y x, X x)) ` space M. ?h x * log b (?h x))"
   1.165      by (auto intro!: setsum_cong)
   1.166    also have "\<dots> = entropy b (count_space (Y ` space M) \<Otimes>\<^isub>M count_space (X ` space M)) (\<lambda>x. (Y x, X x))"
   1.167 -    by (subst entropy_distr[OF _ simple_distributed_joint[OF YX]])
   1.168 +    by (subst entropy_distr[OF simple_distributed_joint[OF YX]])
   1.169         (auto simp: pair_measure_count_space sigma_finite_measure_count_space_finite lebesgue_integral_count_space_finite
   1.170               cong del: setsum_cong  intro!: setsum_mono_zero_left)
   1.171    finally have "\<H>(\<lambda>x. (X x, Y x)) = entropy b (count_space (Y ` space M) \<Otimes>\<^isub>M count_space (X ` space M)) (\<lambda>x. (Y x, X x))" .