src/HOL/Probability/Distribution_Functions.thy
changeset 62975 1d066f6ab25d
parent 62083 7582b39f51ed
child 63092 a949b2a5f51d
equal deleted inserted replaced
62974:f17602cbf76a 62975:1d066f6ab25d
     4 *)
     4 *)
     5 
     5 
     6 section \<open>Distribution Functions\<close>
     6 section \<open>Distribution Functions\<close>
     7 
     7 
     8 text \<open>
     8 text \<open>
     9 Shows that the cumulative distribution function (cdf) of a distribution (a measure on the reals) is 
     9 Shows that the cumulative distribution function (cdf) of a distribution (a measure on the reals) is
    10 nondecreasing and right continuous, which tends to 0 and 1 in either direction.
    10 nondecreasing and right continuous, which tends to 0 and 1 in either direction.
    11 
    11 
    12 Conversely, every such function is the cdf of a unique distribution. This direction defines the 
    12 Conversely, every such function is the cdf of a unique distribution. This direction defines the
    13 measure in the obvious way on half-open intervals, and then applies the Caratheodory extension 
    13 measure in the obvious way on half-open intervals, and then applies the Caratheodory extension
    14 theorem.
    14 theorem.
    15 \<close>
    15 \<close>
    16 
    16 
    17 (* TODO: the locales "finite_borel_measure" and "real_distribution" are defined here, but maybe they
    17 (* TODO: the locales "finite_borel_measure" and "real_distribution" are defined here, but maybe they
    18  should be somewhere else. *)
    18  should be somewhere else. *)
    41 begin
    41 begin
    42 
    42 
    43 lemma sets_M[intro]: "a \<in> sets borel \<Longrightarrow> a \<in> sets M"
    43 lemma sets_M[intro]: "a \<in> sets borel \<Longrightarrow> a \<in> sets M"
    44   using M_super_borel by auto
    44   using M_super_borel by auto
    45 
    45 
    46 lemma cdf_diff_eq: 
    46 lemma cdf_diff_eq:
    47   assumes "x < y"
    47   assumes "x < y"
    48   shows "cdf M y - cdf M x = measure M {x<..y}"
    48   shows "cdf M y - cdf M x = measure M {x<..y}"
    49 proof -
    49 proof -
    50   from assms have *: "{..x} \<union> {x<..y} = {..y}" by auto
    50   from assms have *: "{..x} \<union> {x<..y} = {..y}" by auto
    51   have "measure M {..y} = measure M {..x} + measure M {x<..y}"
    51   have "measure M {..y} = measure M {..x} + measure M {x<..y}"
    57 lemma cdf_nondecreasing: "x \<le> y \<Longrightarrow> cdf M x \<le> cdf M y"
    57 lemma cdf_nondecreasing: "x \<le> y \<Longrightarrow> cdf M x \<le> cdf M y"
    58   unfolding cdf_def by (auto intro!: finite_measure_mono)
    58   unfolding cdf_def by (auto intro!: finite_measure_mono)
    59 
    59 
    60 lemma borel_UNIV: "space M = UNIV"
    60 lemma borel_UNIV: "space M = UNIV"
    61  by (metis in_mono sets.sets_into_space space_in_borel top_le M_super_borel)
    61  by (metis in_mono sets.sets_into_space space_in_borel top_le M_super_borel)
    62  
    62 
    63 lemma cdf_nonneg: "cdf M x \<ge> 0"
    63 lemma cdf_nonneg: "cdf M x \<ge> 0"
    64   unfolding cdf_def by (rule measure_nonneg)
    64   unfolding cdf_def by (rule measure_nonneg)
    65 
    65 
    66 lemma cdf_bounded: "cdf M x \<le> measure M (space M)"
    66 lemma cdf_bounded: "cdf M x \<le> measure M (space M)"
    67   unfolding cdf_def using assms by (intro bounded_measure)
    67   unfolding cdf_def using assms by (intro bounded_measure)
    74   also have "(\<Union> i::nat. {..real i}) = space M"
    74   also have "(\<Union> i::nat. {..real i}) = space M"
    75     by (auto simp: borel_UNIV intro: real_arch_simple)
    75     by (auto simp: borel_UNIV intro: real_arch_simple)
    76   finally show ?thesis .
    76   finally show ?thesis .
    77 qed
    77 qed
    78 
    78 
    79 lemma cdf_lim_at_top: "(cdf M \<longlongrightarrow> measure M (space M)) at_top" 
    79 lemma cdf_lim_at_top: "(cdf M \<longlongrightarrow> measure M (space M)) at_top"
    80   by (rule tendsto_at_topI_sequentially_real)
    80   by (rule tendsto_at_topI_sequentially_real)
    81      (simp_all add: mono_def cdf_nondecreasing cdf_lim_infty)
    81      (simp_all add: mono_def cdf_nondecreasing cdf_lim_infty)
    82 
    82 
    83 lemma cdf_lim_neg_infty: "((\<lambda>i. cdf M (- real i)) \<longlonglongrightarrow> 0)" 
    83 lemma cdf_lim_neg_infty: "((\<lambda>i. cdf M (- real i)) \<longlonglongrightarrow> 0)"
    84 proof -
    84 proof -
    85   have "(\<lambda>i. cdf M (- real i)) \<longlonglongrightarrow> measure M (\<Inter> i::nat. {.. - real i })"
    85   have "(\<lambda>i. cdf M (- real i)) \<longlonglongrightarrow> measure M (\<Inter> i::nat. {.. - real i })"
    86     unfolding cdf_def by (rule finite_Lim_measure_decseq) (auto simp: decseq_def)
    86     unfolding cdf_def by (rule finite_Lim_measure_decseq) (auto simp: decseq_def)
    87   also have "(\<Inter> i::nat. {..- real i}) = {}"
    87   also have "(\<Inter> i::nat. {..- real i}) = {}"
    88     by auto (metis leD le_minus_iff reals_Archimedean2)
    88     by auto (metis leD le_minus_iff reals_Archimedean2)
    89   finally show ?thesis
    89   finally show ?thesis
    90     by simp
    90     by simp
    91 qed
    91 qed
    92 
    92 
    93 lemma cdf_lim_at_bot: "(cdf M \<longlongrightarrow> 0) at_bot"
    93 lemma cdf_lim_at_bot: "(cdf M \<longlongrightarrow> 0) at_bot"
    94 proof - 
    94 proof -
    95   have *: "((\<lambda>x :: real. - cdf M (- x)) \<longlongrightarrow> 0) at_top"
    95   have *: "((\<lambda>x :: real. - cdf M (- x)) \<longlongrightarrow> 0) at_top"
    96     by (intro tendsto_at_topI_sequentially_real monoI)
    96     by (intro tendsto_at_topI_sequentially_real monoI)
    97        (auto simp: cdf_nondecreasing cdf_lim_neg_infty tendsto_minus_cancel_left[symmetric])
    97        (auto simp: cdf_nondecreasing cdf_lim_neg_infty tendsto_minus_cancel_left[symmetric])
    98   from filterlim_compose [OF *, OF filterlim_uminus_at_top_at_bot]
    98   from filterlim_compose [OF *, OF filterlim_uminus_at_top_at_bot]
    99   show ?thesis
    99   show ?thesis
   103 lemma cdf_is_right_cont: "continuous (at_right a) (cdf M)"
   103 lemma cdf_is_right_cont: "continuous (at_right a) (cdf M)"
   104   unfolding continuous_within
   104   unfolding continuous_within
   105 proof (rule tendsto_at_right_sequentially[where b="a + 1"])
   105 proof (rule tendsto_at_right_sequentially[where b="a + 1"])
   106   fix f :: "nat \<Rightarrow> real" and x assume f: "decseq f" "f \<longlonglongrightarrow> a"
   106   fix f :: "nat \<Rightarrow> real" and x assume f: "decseq f" "f \<longlonglongrightarrow> a"
   107   then have "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M (\<Inter>i. {.. f i})"
   107   then have "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M (\<Inter>i. {.. f i})"
   108     using `decseq f` unfolding cdf_def 
   108     using `decseq f` unfolding cdf_def
   109     by (intro finite_Lim_measure_decseq) (auto simp: decseq_def)
   109     by (intro finite_Lim_measure_decseq) (auto simp: decseq_def)
   110   also have "(\<Inter>i. {.. f i}) = {.. a}"
   110   also have "(\<Inter>i. {.. f i}) = {.. a}"
   111     using decseq_le[OF f] by (auto intro: order_trans LIMSEQ_le_const[OF f(2)])
   111     using decseq_le[OF f] by (auto intro: order_trans LIMSEQ_le_const[OF f(2)])
   112   finally show "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> cdf M a"
   112   finally show "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> cdf M a"
   113     by (simp add: cdf_def)
   113     by (simp add: cdf_def)
   115 
   115 
   116 lemma cdf_at_left: "(cdf M \<longlongrightarrow> measure M {..<a}) (at_left a)"
   116 lemma cdf_at_left: "(cdf M \<longlongrightarrow> measure M {..<a}) (at_left a)"
   117 proof (rule tendsto_at_left_sequentially[of "a - 1"])
   117 proof (rule tendsto_at_left_sequentially[of "a - 1"])
   118   fix f :: "nat \<Rightarrow> real" and x assume f: "incseq f" "f \<longlonglongrightarrow> a" "\<And>x. f x < a" "\<And>x. a - 1 < f x"
   118   fix f :: "nat \<Rightarrow> real" and x assume f: "incseq f" "f \<longlonglongrightarrow> a" "\<And>x. f x < a" "\<And>x. a - 1 < f x"
   119   then have "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M (\<Union>i. {.. f i})"
   119   then have "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M (\<Union>i. {.. f i})"
   120     using `incseq f` unfolding cdf_def 
   120     using `incseq f` unfolding cdf_def
   121     by (intro finite_Lim_measure_incseq) (auto simp: incseq_def)
   121     by (intro finite_Lim_measure_incseq) (auto simp: incseq_def)
   122   also have "(\<Union>i. {.. f i}) = {..<a}"
   122   also have "(\<Union>i. {.. f i}) = {..<a}"
   123     by (auto dest!: order_tendstoD(1)[OF f(2)] eventually_happens'[OF sequentially_bot]
   123     by (auto dest!: order_tendstoD(1)[OF f(2)] eventually_happens'[OF sequentially_bot]
   124              intro: less_imp_le le_less_trans f(3))
   124              intro: less_imp_le le_less_trans f(3))
   125   finally show "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M {..<a}"
   125   finally show "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M {..<a}"
   137   finally show ?thesis .
   137   finally show ?thesis .
   138 qed
   138 qed
   139 
   139 
   140 lemma countable_atoms: "countable {x. measure M {x} > 0}"
   140 lemma countable_atoms: "countable {x. measure M {x} > 0}"
   141   using countable_support unfolding zero_less_measure_iff .
   141   using countable_support unfolding zero_less_measure_iff .
   142     
   142 
   143 end
   143 end
   144 
   144 
   145 locale real_distribution = prob_space M for M :: "real measure" +
   145 locale real_distribution = prob_space M for M :: "real measure" +
   146   assumes events_eq_borel [simp, measurable_cong]: "sets M = sets borel" and space_eq_univ [simp]: "space M = UNIV"
   146   assumes events_eq_borel [simp, measurable_cong]: "sets M = sets borel" and space_eq_univ [simp]: "space M = UNIV"
   147 begin
   147 begin
   153   by (subst prob_space [symmetric], rule cdf_bounded)
   153   by (subst prob_space [symmetric], rule cdf_bounded)
   154 
   154 
   155 lemma cdf_lim_infty_prob: "(\<lambda>i. cdf M (real i)) \<longlonglongrightarrow> 1"
   155 lemma cdf_lim_infty_prob: "(\<lambda>i. cdf M (real i)) \<longlonglongrightarrow> 1"
   156   by (subst prob_space [symmetric], rule cdf_lim_infty)
   156   by (subst prob_space [symmetric], rule cdf_lim_infty)
   157 
   157 
   158 lemma cdf_lim_at_top_prob: "(cdf M \<longlongrightarrow> 1) at_top" 
   158 lemma cdf_lim_at_top_prob: "(cdf M \<longlongrightarrow> 1) at_top"
   159   by (subst prob_space [symmetric], rule cdf_lim_at_top)
   159   by (subst prob_space [symmetric], rule cdf_lim_at_top)
   160 
   160 
   161 lemma measurable_finite_borel [simp]:
   161 lemma measurable_finite_borel [simp]:
   162   "f \<in> borel_measurable borel \<Longrightarrow> f \<in> borel_measurable M"
   162   "f \<in> borel_measurable borel \<Longrightarrow> f \<in> borel_measurable M"
   163   by (rule borel_measurable_subalgebra[where N=borel]) auto
   163   by (rule borel_measurable_subalgebra[where N=borel]) auto
   198   Int_stable_def)
   198   Int_stable_def)
   199 
   199 
   200 lemma real_distribution_interval_measure:
   200 lemma real_distribution_interval_measure:
   201   fixes F :: "real \<Rightarrow> real"
   201   fixes F :: "real \<Rightarrow> real"
   202   assumes nondecF : "\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y" and
   202   assumes nondecF : "\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y" and
   203     right_cont_F : "\<And>a. continuous (at_right a) F" and 
   203     right_cont_F : "\<And>a. continuous (at_right a) F" and
   204     lim_F_at_bot : "(F \<longlongrightarrow> 0) at_bot" and
   204     lim_F_at_bot : "(F \<longlongrightarrow> 0) at_bot" and
   205     lim_F_at_top : "(F \<longlongrightarrow> 1) at_top"
   205     lim_F_at_top : "(F \<longlongrightarrow> 1) at_top"
   206   shows "real_distribution (interval_measure F)"
   206   shows "real_distribution (interval_measure F)"
   207 proof -
   207 proof -
   208   let ?F = "interval_measure F"
   208   let ?F = "interval_measure F"
   209   interpret prob_space ?F
   209   interpret prob_space ?F
   210   proof
   210   proof
   211     have "ereal (1 - 0) = (SUP i::nat. ereal (F (real i) - F (- real i)))"
   211     have "ennreal (1 - 0) = (SUP i::nat. ennreal (F (real i) - F (- real i)))"
   212       by (intro LIMSEQ_unique[OF _ LIMSEQ_SUP] lim_ereal[THEN iffD2] tendsto_intros
   212       by (intro LIMSEQ_unique[OF _ LIMSEQ_SUP] tendsto_ennrealI tendsto_intros
   213          lim_F_at_bot[THEN filterlim_compose] lim_F_at_top[THEN filterlim_compose]
   213                 lim_F_at_bot[THEN filterlim_compose] lim_F_at_top[THEN filterlim_compose]
   214          lim_F_at_bot[THEN filterlim_compose] filterlim_real_sequentially
   214                 lim_F_at_bot[THEN filterlim_compose] filterlim_real_sequentially
   215          filterlim_uminus_at_top[THEN iffD1])
   215                 filterlim_uminus_at_top[THEN iffD1])
   216          (auto simp: incseq_def intro!: diff_mono nondecF)
   216          (auto simp: incseq_def nondecF intro!: diff_mono)
   217     also have "\<dots> = (SUP i::nat. emeasure ?F {- real i<..real i})"
   217     also have "\<dots> = (SUP i::nat. emeasure ?F {- real i<..real i})"
   218       by (subst emeasure_interval_measure_Ioc) (simp_all add: nondecF right_cont_F)
   218       by (subst emeasure_interval_measure_Ioc) (simp_all add: nondecF right_cont_F)
   219     also have "\<dots> = emeasure ?F (\<Union>i::nat. {- real i<..real i})"
   219     also have "\<dots> = emeasure ?F (\<Union>i::nat. {- real i<..real i})"
   220       by (rule SUP_emeasure_incseq) (auto simp: incseq_def)
   220       by (rule SUP_emeasure_incseq) (auto simp: incseq_def)
   221     also have "(\<Union>i. {- real (i::nat)<..real i}) = space ?F"
   221     also have "(\<Union>i. {- real (i::nat)<..real i}) = space ?F"
   228 qed
   228 qed
   229 
   229 
   230 lemma cdf_interval_measure:
   230 lemma cdf_interval_measure:
   231   fixes F :: "real \<Rightarrow> real"
   231   fixes F :: "real \<Rightarrow> real"
   232   assumes nondecF : "\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y" and
   232   assumes nondecF : "\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y" and
   233     right_cont_F : "\<And>a. continuous (at_right a) F" and 
   233     right_cont_F : "\<And>a. continuous (at_right a) F" and
   234     lim_F_at_bot : "(F \<longlongrightarrow> 0) at_bot" and
   234     lim_F_at_bot : "(F \<longlongrightarrow> 0) at_bot" and
   235     lim_F_at_top : "(F \<longlongrightarrow> 1) at_top"
   235     lim_F_at_top : "(F \<longlongrightarrow> 1) at_top"
   236   shows "cdf (interval_measure F) = F"
   236   shows "cdf (interval_measure F) = F"
   237   unfolding cdf_def
   237   unfolding cdf_def
   238 proof (intro ext)
   238 proof (intro ext)