| 62083 |      1 | (*
 | 
|  |      2 |   Title    : Distribution_Functions.thy
 | 
|  |      3 |   Authors  : Jeremy Avigad and Luke Serafin
 | 
|  |      4 | *)
 | 
|  |      5 | 
 | 
|  |      6 | section \<open>Distribution Functions\<close>
 | 
|  |      7 | 
 | 
|  |      8 | text \<open>
 | 
|  |      9 | Shows that the cumulative distribution function (cdf) of a distribution (a measure on the reals) is 
 | 
|  |     10 | nondecreasing and right continuous, which tends to 0 and 1 in either direction.
 | 
|  |     11 | 
 | 
|  |     12 | Conversely, every such function is the cdf of a unique distribution. This direction defines the 
 | 
|  |     13 | measure in the obvious way on half-open intervals, and then applies the Caratheodory extension 
 | 
|  |     14 | theorem.
 | 
|  |     15 | \<close>
 | 
|  |     16 | 
 | 
|  |     17 | (* TODO: the locales "finite_borel_measure" and "real_distribution" are defined here, but maybe they
 | 
|  |     18 |  should be somewhere else. *)
 | 
|  |     19 | 
 | 
|  |     20 | theory Distribution_Functions
 | 
|  |     21 |   imports Probability_Measure "~~/src/HOL/Library/ContNotDenum"
 | 
|  |     22 | begin
 | 
|  |     23 | 
 | 
|  |     24 | lemma UN_Ioc_eq_UNIV: "(\<Union>n. { -real n <.. real n}) = UNIV"
 | 
|  |     25 |   by auto
 | 
|  |     26 |      (metis le_less_trans minus_minus neg_less_iff_less not_le real_arch_simple
 | 
|  |     27 |             of_nat_0_le_iff reals_Archimedean2)
 | 
|  |     28 | 
 | 
|  |     29 | subsection {* Properties of cdf's *}
 | 
|  |     30 | 
 | 
|  |     31 | definition
 | 
|  |     32 |   cdf :: "real measure \<Rightarrow> real \<Rightarrow> real"
 | 
|  |     33 | where
 | 
|  |     34 |   "cdf M \<equiv> \<lambda>x. measure M {..x}"
 | 
|  |     35 | 
 | 
|  |     36 | lemma cdf_def2: "cdf M x = measure M {..x}"
 | 
|  |     37 |   by (simp add: cdf_def)
 | 
|  |     38 | 
 | 
|  |     39 | locale finite_borel_measure = finite_measure M for M :: "real measure" +
 | 
|  |     40 |   assumes M_super_borel: "sets borel \<subseteq> sets M"
 | 
|  |     41 | begin
 | 
|  |     42 | 
 | 
|  |     43 | lemma sets_M[intro]: "a \<in> sets borel \<Longrightarrow> a \<in> sets M"
 | 
|  |     44 |   using M_super_borel by auto
 | 
|  |     45 | 
 | 
|  |     46 | lemma cdf_diff_eq: 
 | 
|  |     47 |   assumes "x < y"
 | 
|  |     48 |   shows "cdf M y - cdf M x = measure M {x<..y}"
 | 
|  |     49 | proof -
 | 
|  |     50 |   from assms have *: "{..x} \<union> {x<..y} = {..y}" by auto
 | 
|  |     51 |   have "measure M {..y} = measure M {..x} + measure M {x<..y}"
 | 
|  |     52 |     by (subst finite_measure_Union [symmetric], auto simp add: *)
 | 
|  |     53 |   thus ?thesis
 | 
|  |     54 |     unfolding cdf_def by auto
 | 
|  |     55 | qed
 | 
|  |     56 | 
 | 
|  |     57 | lemma cdf_nondecreasing: "x \<le> y \<Longrightarrow> cdf M x \<le> cdf M y"
 | 
|  |     58 |   unfolding cdf_def by (auto intro!: finite_measure_mono)
 | 
|  |     59 | 
 | 
|  |     60 | lemma borel_UNIV: "space M = UNIV"
 | 
|  |     61 |  by (metis in_mono sets.sets_into_space space_in_borel top_le M_super_borel)
 | 
|  |     62 |  
 | 
|  |     63 | lemma cdf_nonneg: "cdf M x \<ge> 0"
 | 
|  |     64 |   unfolding cdf_def by (rule measure_nonneg)
 | 
|  |     65 | 
 | 
|  |     66 | lemma cdf_bounded: "cdf M x \<le> measure M (space M)"
 | 
|  |     67 |   unfolding cdf_def using assms by (intro bounded_measure)
 | 
|  |     68 | 
 | 
|  |     69 | lemma cdf_lim_infty:
 | 
|  |     70 |   "((\<lambda>i. cdf M (real i)) \<longlonglongrightarrow> measure M (space M))"
 | 
|  |     71 | proof -
 | 
|  |     72 |   have "(\<lambda>i. cdf M (real i)) \<longlonglongrightarrow> measure M (\<Union> i::nat. {..real i})"
 | 
|  |     73 |     unfolding cdf_def by (rule finite_Lim_measure_incseq) (auto simp: incseq_def)
 | 
|  |     74 |   also have "(\<Union> i::nat. {..real i}) = space M"
 | 
|  |     75 |     by (auto simp: borel_UNIV intro: real_arch_simple)
 | 
|  |     76 |   finally show ?thesis .
 | 
|  |     77 | qed
 | 
|  |     78 | 
 | 
|  |     79 | lemma cdf_lim_at_top: "(cdf M \<longlongrightarrow> measure M (space M)) at_top" 
 | 
|  |     80 |   by (rule tendsto_at_topI_sequentially_real)
 | 
|  |     81 |      (simp_all add: mono_def cdf_nondecreasing cdf_lim_infty)
 | 
|  |     82 | 
 | 
|  |     83 | lemma cdf_lim_neg_infty: "((\<lambda>i. cdf M (- real i)) \<longlonglongrightarrow> 0)" 
 | 
|  |     84 | proof -
 | 
|  |     85 |   have "(\<lambda>i. cdf M (- real i)) \<longlonglongrightarrow> measure M (\<Inter> i::nat. {.. - real i })"
 | 
|  |     86 |     unfolding cdf_def by (rule finite_Lim_measure_decseq) (auto simp: decseq_def)
 | 
|  |     87 |   also have "(\<Inter> i::nat. {..- real i}) = {}"
 | 
|  |     88 |     by auto (metis leD le_minus_iff reals_Archimedean2)
 | 
|  |     89 |   finally show ?thesis
 | 
|  |     90 |     by simp
 | 
|  |     91 | qed
 | 
|  |     92 | 
 | 
|  |     93 | lemma cdf_lim_at_bot: "(cdf M \<longlongrightarrow> 0) at_bot"
 | 
|  |     94 | proof - 
 | 
|  |     95 |   have *: "((\<lambda>x :: real. - cdf M (- x)) \<longlongrightarrow> 0) at_top"
 | 
|  |     96 |     by (intro tendsto_at_topI_sequentially_real monoI)
 | 
|  |     97 |        (auto simp: cdf_nondecreasing cdf_lim_neg_infty tendsto_minus_cancel_left[symmetric])
 | 
|  |     98 |   from filterlim_compose [OF *, OF filterlim_uminus_at_top_at_bot]
 | 
|  |     99 |   show ?thesis
 | 
|  |    100 |     unfolding tendsto_minus_cancel_left[symmetric] by simp
 | 
|  |    101 | qed
 | 
|  |    102 | 
 | 
|  |    103 | lemma cdf_is_right_cont: "continuous (at_right a) (cdf M)"
 | 
|  |    104 |   unfolding continuous_within
 | 
|  |    105 | proof (rule tendsto_at_right_sequentially[where b="a + 1"])
 | 
|  |    106 |   fix f :: "nat \<Rightarrow> real" and x assume f: "decseq f" "f \<longlonglongrightarrow> a"
 | 
|  |    107 |   then have "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M (\<Inter>i. {.. f i})"
 | 
|  |    108 |     using `decseq f` unfolding cdf_def 
 | 
|  |    109 |     by (intro finite_Lim_measure_decseq) (auto simp: decseq_def)
 | 
|  |    110 |   also have "(\<Inter>i. {.. f i}) = {.. a}"
 | 
|  |    111 |     using decseq_le[OF f] by (auto intro: order_trans LIMSEQ_le_const[OF f(2)])
 | 
|  |    112 |   finally show "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> cdf M a"
 | 
|  |    113 |     by (simp add: cdf_def)
 | 
|  |    114 | qed simp
 | 
|  |    115 | 
 | 
|  |    116 | lemma cdf_at_left: "(cdf M \<longlongrightarrow> measure M {..<a}) (at_left a)"
 | 
|  |    117 | proof (rule tendsto_at_left_sequentially[of "a - 1"])
 | 
|  |    118 |   fix f :: "nat \<Rightarrow> real" and x assume f: "incseq f" "f \<longlonglongrightarrow> a" "\<And>x. f x < a" "\<And>x. a - 1 < f x"
 | 
|  |    119 |   then have "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M (\<Union>i. {.. f i})"
 | 
|  |    120 |     using `incseq f` unfolding cdf_def 
 | 
|  |    121 |     by (intro finite_Lim_measure_incseq) (auto simp: incseq_def)
 | 
|  |    122 |   also have "(\<Union>i. {.. f i}) = {..<a}"
 | 
|  |    123 |     by (auto dest!: order_tendstoD(1)[OF f(2)] eventually_happens'[OF sequentially_bot]
 | 
|  |    124 |              intro: less_imp_le le_less_trans f(3))
 | 
|  |    125 |   finally show "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M {..<a}"
 | 
|  |    126 |     by (simp add: cdf_def)
 | 
|  |    127 | qed auto
 | 
|  |    128 | 
 | 
|  |    129 | lemma isCont_cdf: "isCont (cdf M) x \<longleftrightarrow> measure M {x} = 0"
 | 
|  |    130 | proof -
 | 
|  |    131 |   have "isCont (cdf M) x \<longleftrightarrow> cdf M x = measure M {..<x}"
 | 
|  |    132 |     by (auto simp: continuous_at_split cdf_is_right_cont continuous_within[where s="{..< _}"]
 | 
|  |    133 |                    cdf_at_left tendsto_unique[OF _ cdf_at_left])
 | 
|  |    134 |   also have "cdf M x = measure M {..<x} \<longleftrightarrow> measure M {x} = 0"
 | 
|  |    135 |     unfolding cdf_def ivl_disj_un(2)[symmetric]
 | 
|  |    136 |     by (subst finite_measure_Union) auto
 | 
|  |    137 |   finally show ?thesis .
 | 
|  |    138 | qed
 | 
|  |    139 | 
 | 
|  |    140 | lemma countable_atoms: "countable {x. measure M {x} > 0}"
 | 
|  |    141 |   using countable_support unfolding zero_less_measure_iff .
 | 
|  |    142 |     
 | 
|  |    143 | end
 | 
|  |    144 | 
 | 
|  |    145 | locale real_distribution = prob_space M for M :: "real measure" +
 | 
|  |    146 |   assumes events_eq_borel [simp, measurable_cong]: "sets M = sets borel" and space_eq_univ [simp]: "space M = UNIV"
 | 
|  |    147 | begin
 | 
|  |    148 | 
 | 
|  |    149 | sublocale finite_borel_measure M
 | 
|  |    150 |   by standard auto
 | 
|  |    151 | 
 | 
|  |    152 | lemma cdf_bounded_prob: "\<And>x. cdf M x \<le> 1"
 | 
|  |    153 |   by (subst prob_space [symmetric], rule cdf_bounded)
 | 
|  |    154 | 
 | 
|  |    155 | lemma cdf_lim_infty_prob: "(\<lambda>i. cdf M (real i)) \<longlonglongrightarrow> 1"
 | 
|  |    156 |   by (subst prob_space [symmetric], rule cdf_lim_infty)
 | 
|  |    157 | 
 | 
|  |    158 | lemma cdf_lim_at_top_prob: "(cdf M \<longlongrightarrow> 1) at_top" 
 | 
|  |    159 |   by (subst prob_space [symmetric], rule cdf_lim_at_top)
 | 
|  |    160 | 
 | 
|  |    161 | lemma measurable_finite_borel [simp]:
 | 
|  |    162 |   "f \<in> borel_measurable borel \<Longrightarrow> f \<in> borel_measurable M"
 | 
|  |    163 |   by (rule borel_measurable_subalgebra[where N=borel]) auto
 | 
|  |    164 | 
 | 
|  |    165 | end
 | 
|  |    166 | 
 | 
|  |    167 | lemma (in prob_space) real_distribution_distr [intro, simp]:
 | 
|  |    168 |   "random_variable borel X \<Longrightarrow> real_distribution (distr M borel X)"
 | 
|  |    169 |   unfolding real_distribution_def real_distribution_axioms_def by (auto intro!: prob_space_distr)
 | 
|  |    170 | 
 | 
|  |    171 | subsection {* uniqueness *}
 | 
|  |    172 | 
 | 
|  |    173 | lemma (in real_distribution) emeasure_Ioc:
 | 
|  |    174 |   assumes "a \<le> b" shows "emeasure M {a <.. b} = cdf M b - cdf M a"
 | 
|  |    175 | proof -
 | 
|  |    176 |   have "{a <.. b} = {..b} - {..a}"
 | 
|  |    177 |     by auto
 | 
|  |    178 |   with `a \<le> b` show ?thesis
 | 
|  |    179 |     by (simp add: emeasure_eq_measure finite_measure_Diff cdf_def)
 | 
|  |    180 | qed
 | 
|  |    181 | 
 | 
|  |    182 | lemma cdf_unique:
 | 
|  |    183 |   fixes M1 M2
 | 
|  |    184 |   assumes "real_distribution M1" and "real_distribution M2"
 | 
|  |    185 |   assumes "cdf M1 = cdf M2"
 | 
|  |    186 |   shows "M1 = M2"
 | 
|  |    187 | proof (rule measure_eqI_generator_eq[where \<Omega>=UNIV])
 | 
|  |    188 |   fix X assume "X \<in> range (\<lambda>(a, b). {a<..b::real})"
 | 
|  |    189 |   then obtain a b where Xeq: "X = {a<..b}" by auto
 | 
|  |    190 |   then show "emeasure M1 X = emeasure M2 X"
 | 
|  |    191 |     by (cases "a \<le> b")
 | 
|  |    192 |        (simp_all add: assms(1,2)[THEN real_distribution.emeasure_Ioc] assms(3))
 | 
|  |    193 | next
 | 
|  |    194 |   show "(\<Union>i. {- real (i::nat)<..real i}) = UNIV"
 | 
|  |    195 |     by (rule UN_Ioc_eq_UNIV)
 | 
|  |    196 | qed (auto simp: real_distribution.emeasure_Ioc[OF assms(1)]
 | 
|  |    197 |   assms(1,2)[THEN real_distribution.events_eq_borel] borel_sigma_sets_Ioc
 | 
|  |    198 |   Int_stable_def)
 | 
|  |    199 | 
 | 
|  |    200 | lemma real_distribution_interval_measure:
 | 
|  |    201 |   fixes F :: "real \<Rightarrow> real"
 | 
|  |    202 |   assumes nondecF : "\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y" and
 | 
|  |    203 |     right_cont_F : "\<And>a. continuous (at_right a) F" and 
 | 
|  |    204 |     lim_F_at_bot : "(F \<longlongrightarrow> 0) at_bot" and
 | 
|  |    205 |     lim_F_at_top : "(F \<longlongrightarrow> 1) at_top"
 | 
|  |    206 |   shows "real_distribution (interval_measure F)"
 | 
|  |    207 | proof -
 | 
|  |    208 |   let ?F = "interval_measure F"
 | 
|  |    209 |   interpret prob_space ?F
 | 
|  |    210 |   proof
 | 
|  |    211 |     have "ereal (1 - 0) = (SUP i::nat. ereal (F (real i) - F (- real i)))"
 | 
|  |    212 |       by (intro LIMSEQ_unique[OF _ LIMSEQ_SUP] lim_ereal[THEN iffD2] tendsto_intros
 | 
|  |    213 |          lim_F_at_bot[THEN filterlim_compose] lim_F_at_top[THEN filterlim_compose]
 | 
|  |    214 |          lim_F_at_bot[THEN filterlim_compose] filterlim_real_sequentially
 | 
|  |    215 |          filterlim_uminus_at_top[THEN iffD1])
 | 
|  |    216 |          (auto simp: incseq_def intro!: diff_mono nondecF)
 | 
|  |    217 |     also have "\<dots> = (SUP i::nat. emeasure ?F {- real i<..real i})"
 | 
|  |    218 |       by (subst emeasure_interval_measure_Ioc) (simp_all add: nondecF right_cont_F)
 | 
|  |    219 |     also have "\<dots> = emeasure ?F (\<Union>i::nat. {- real i<..real i})"
 | 
|  |    220 |       by (rule SUP_emeasure_incseq) (auto simp: incseq_def)
 | 
|  |    221 |     also have "(\<Union>i. {- real (i::nat)<..real i}) = space ?F"
 | 
|  |    222 |       by (simp add: UN_Ioc_eq_UNIV)
 | 
|  |    223 |     finally show "emeasure ?F (space ?F) = 1"
 | 
|  |    224 |       by (simp add: one_ereal_def)
 | 
|  |    225 |   qed
 | 
|  |    226 |   show ?thesis
 | 
|  |    227 |     proof qed simp_all
 | 
|  |    228 | qed
 | 
|  |    229 | 
 | 
|  |    230 | lemma cdf_interval_measure:
 | 
|  |    231 |   fixes F :: "real \<Rightarrow> real"
 | 
|  |    232 |   assumes nondecF : "\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y" and
 | 
|  |    233 |     right_cont_F : "\<And>a. continuous (at_right a) F" and 
 | 
|  |    234 |     lim_F_at_bot : "(F \<longlongrightarrow> 0) at_bot" and
 | 
|  |    235 |     lim_F_at_top : "(F \<longlongrightarrow> 1) at_top"
 | 
|  |    236 |   shows "cdf (interval_measure F) = F"
 | 
|  |    237 |   unfolding cdf_def
 | 
|  |    238 | proof (intro ext)
 | 
|  |    239 |   interpret real_distribution "interval_measure F"
 | 
|  |    240 |     by (rule real_distribution_interval_measure) fact+
 | 
|  |    241 |   fix x
 | 
|  |    242 |   have "F x - 0 = measure (interval_measure F) (\<Union>i::nat. {-real i <.. x})"
 | 
|  |    243 |   proof (intro LIMSEQ_unique[OF _ finite_Lim_measure_incseq])
 | 
|  |    244 |     have "(\<lambda>i. F x - F (- real i)) \<longlonglongrightarrow> F x - 0"
 | 
|  |    245 |       by (intro tendsto_intros lim_F_at_bot[THEN filterlim_compose] filterlim_real_sequentially
 | 
|  |    246 |                 filterlim_uminus_at_top[THEN iffD1])
 | 
|  |    247 |     then show "(\<lambda>i. measure (interval_measure F) {- real i<..x}) \<longlonglongrightarrow> F x - 0"
 | 
|  |    248 |       apply (rule filterlim_cong[OF refl refl, THEN iffD1, rotated])
 | 
|  |    249 |       apply (rule eventually_sequentiallyI[where c="nat (ceiling (- x))"])
 | 
|  |    250 |       apply (simp add: measure_interval_measure_Ioc right_cont_F nondecF)
 | 
|  |    251 |       done
 | 
|  |    252 |   qed (auto simp: incseq_def)
 | 
|  |    253 |   also have "(\<Union>i::nat. {-real i <.. x}) = {..x}"
 | 
|  |    254 |     by auto (metis minus_minus neg_less_iff_less reals_Archimedean2)
 | 
|  |    255 |   finally show "measure (interval_measure F) {..x} = F x"
 | 
|  |    256 |     by simp
 | 
|  |    257 | qed
 | 
|  |    258 | 
 | 
|  |    259 | end
 |