src/HOL/Number_Theory/Eratosthenes.thy
author nipkow
Mon Oct 17 11:46:22 2016 +0200 (2016-10-17)
changeset 64267 b9a1486e79be
parent 64243 aee949f6642d
child 65417 fc41a5650fb1
permissions -rw-r--r--
setsum -> sum
     1 (*  Title:      HOL/Number_Theory/Eratosthenes.thy
     2     Author:     Florian Haftmann, TU Muenchen
     3 *)
     4 
     5 section \<open>The sieve of Eratosthenes\<close>
     6 
     7 theory Eratosthenes
     8 imports Main Primes
     9 begin
    10 
    11 
    12 subsection \<open>Preliminary: strict divisibility\<close>
    13 
    14 context dvd
    15 begin
    16 
    17 abbreviation dvd_strict :: "'a \<Rightarrow> 'a \<Rightarrow> bool" (infixl "dvd'_strict" 50)
    18 where
    19   "b dvd_strict a \<equiv> b dvd a \<and> \<not> a dvd b"
    20 
    21 end
    22 
    23 
    24 subsection \<open>Main corpus\<close>
    25 
    26 text \<open>The sieve is modelled as a list of booleans, where @{const False} means \emph{marked out}.\<close>
    27 
    28 type_synonym marks = "bool list"
    29 
    30 definition numbers_of_marks :: "nat \<Rightarrow> marks \<Rightarrow> nat set"
    31 where
    32   "numbers_of_marks n bs = fst ` {x \<in> set (enumerate n bs). snd x}"
    33 
    34 lemma numbers_of_marks_simps [simp, code]:
    35   "numbers_of_marks n [] = {}"
    36   "numbers_of_marks n (True # bs) = insert n (numbers_of_marks (Suc n) bs)"
    37   "numbers_of_marks n (False # bs) = numbers_of_marks (Suc n) bs"
    38   by (auto simp add: numbers_of_marks_def intro!: image_eqI)
    39 
    40 lemma numbers_of_marks_Suc:
    41   "numbers_of_marks (Suc n) bs = Suc ` numbers_of_marks n bs"
    42   by (auto simp add: numbers_of_marks_def enumerate_Suc_eq image_iff Bex_def)
    43 
    44 lemma numbers_of_marks_replicate_False [simp]:
    45   "numbers_of_marks n (replicate m False) = {}"
    46   by (auto simp add: numbers_of_marks_def enumerate_replicate_eq)
    47 
    48 lemma numbers_of_marks_replicate_True [simp]:
    49   "numbers_of_marks n (replicate m True) = {n..<n+m}"
    50   by (auto simp add: numbers_of_marks_def enumerate_replicate_eq image_def)
    51 
    52 lemma in_numbers_of_marks_eq:
    53   "m \<in> numbers_of_marks n bs \<longleftrightarrow> m \<in> {n..<n + length bs} \<and> bs ! (m - n)"
    54   by (simp add: numbers_of_marks_def in_set_enumerate_eq image_iff add.commute)
    55 
    56 lemma sorted_list_of_set_numbers_of_marks:
    57   "sorted_list_of_set (numbers_of_marks n bs) = map fst (filter snd (enumerate n bs))"
    58   by (auto simp add: numbers_of_marks_def distinct_map
    59     intro!: sorted_filter distinct_filter inj_onI sorted_distinct_set_unique)
    60 
    61 
    62 text \<open>Marking out multiples in a sieve\<close>
    63 
    64 definition mark_out :: "nat \<Rightarrow> marks \<Rightarrow> marks"
    65 where
    66   "mark_out n bs = map (\<lambda>(q, b). b \<and> \<not> Suc n dvd Suc (Suc q)) (enumerate n bs)"
    67 
    68 lemma mark_out_Nil [simp]: "mark_out n [] = []"
    69   by (simp add: mark_out_def)
    70 
    71 lemma length_mark_out [simp]: "length (mark_out n bs) = length bs"
    72   by (simp add: mark_out_def)
    73 
    74 lemma numbers_of_marks_mark_out:
    75     "numbers_of_marks n (mark_out m bs) = {q \<in> numbers_of_marks n bs. \<not> Suc m dvd Suc q - n}"
    76   by (auto simp add: numbers_of_marks_def mark_out_def in_set_enumerate_eq image_iff
    77     nth_enumerate_eq less_eq_dvd_minus)
    78 
    79 
    80 text \<open>Auxiliary operation for efficient implementation\<close>
    81 
    82 definition mark_out_aux :: "nat \<Rightarrow> nat \<Rightarrow> marks \<Rightarrow> marks"
    83 where
    84   "mark_out_aux n m bs =
    85     map (\<lambda>(q, b). b \<and> (q < m + n \<or> \<not> Suc n dvd Suc (Suc q) + (n - m mod Suc n))) (enumerate n bs)"
    86 
    87 lemma mark_out_code [code]: "mark_out n bs = mark_out_aux n n bs"
    88 proof -
    89   have aux: False
    90     if A: "Suc n dvd Suc (Suc a)"
    91     and B: "a < n + n"
    92     and C: "n \<le> a"
    93     for a
    94   proof (cases "n = 0")
    95     case True
    96     with A B C show ?thesis by simp
    97   next
    98     case False
    99     define m where "m = Suc n"
   100     then have "m > 0" by simp
   101     from False have "n > 0" by simp
   102     from A obtain q where q: "Suc (Suc a) = Suc n * q" by (rule dvdE)
   103     have "q > 0"
   104     proof (rule ccontr)
   105       assume "\<not> q > 0"
   106       with q show False by simp
   107     qed
   108     with \<open>n > 0\<close> have "Suc n * q \<ge> 2" by (auto simp add: gr0_conv_Suc)
   109     with q have a: "a = Suc n * q - 2" by simp
   110     with B have "q + n * q < n + n + 2" by auto
   111     then have "m * q < m * 2" by (simp add: m_def)
   112     with \<open>m > 0\<close> have "q < 2" by simp
   113     with \<open>q > 0\<close> have "q = 1" by simp
   114     with a have "a = n - 1" by simp
   115     with \<open>n > 0\<close> C show False by simp
   116   qed
   117   show ?thesis
   118     by (auto simp add: mark_out_def mark_out_aux_def in_set_enumerate_eq intro: aux)
   119 qed
   120 
   121 lemma mark_out_aux_simps [simp, code]:
   122   "mark_out_aux n m [] = []"
   123   "mark_out_aux n 0 (b # bs) = False # mark_out_aux n n bs"
   124   "mark_out_aux n (Suc m) (b # bs) = b # mark_out_aux n m bs"
   125 proof goal_cases
   126   case 1
   127   show ?case
   128     by (simp add: mark_out_aux_def)
   129 next
   130   case 2
   131   show ?case
   132     by (auto simp add: mark_out_code [symmetric] mark_out_aux_def mark_out_def
   133       enumerate_Suc_eq in_set_enumerate_eq less_eq_dvd_minus)
   134 next
   135   case 3
   136   { define v where "v = Suc m"
   137     define w where "w = Suc n"
   138     fix q
   139     assume "m + n \<le> q"
   140     then obtain r where q: "q = m + n + r" by (auto simp add: le_iff_add)
   141     { fix u
   142       from w_def have "u mod w < w" by simp
   143       then have "u + (w - u mod w) = w + (u - u mod w)"
   144         by simp
   145       then have "u + (w - u mod w) = w + u div w * w"
   146         by (simp add: minus_mod_eq_div_mult)
   147     }
   148     then have "w dvd v + w + r + (w - v mod w) \<longleftrightarrow> w dvd m + w + r + (w - m mod w)"
   149       by (simp add: add.assoc add.left_commute [of m] add.left_commute [of v]
   150         dvd_add_left_iff dvd_add_right_iff)
   151     moreover from q have "Suc q = m + w + r" by (simp add: w_def)
   152     moreover from q have "Suc (Suc q) = v + w + r" by (simp add: v_def w_def)
   153     ultimately have "w dvd Suc (Suc (q + (w - v mod w))) \<longleftrightarrow> w dvd Suc (q + (w - m mod w))"
   154       by (simp only: add_Suc [symmetric])
   155     then have "Suc n dvd Suc (Suc (Suc (q + n) - Suc m mod Suc n)) \<longleftrightarrow>
   156       Suc n dvd Suc (Suc (q + n - m mod Suc n))"
   157       by (simp add: v_def w_def Suc_diff_le trans_le_add2)
   158   }
   159   then show ?case
   160     by (auto simp add: mark_out_aux_def
   161       enumerate_Suc_eq in_set_enumerate_eq not_less)
   162 qed
   163 
   164 
   165 text \<open>Main entry point to sieve\<close>
   166 
   167 fun sieve :: "nat \<Rightarrow> marks \<Rightarrow> marks"
   168 where
   169   "sieve n [] = []"
   170 | "sieve n (False # bs) = False # sieve (Suc n) bs"
   171 | "sieve n (True # bs) = True # sieve (Suc n) (mark_out n bs)"
   172 
   173 text \<open>
   174   There are the following possible optimisations here:
   175 
   176   \begin{itemize}
   177 
   178     \item @{const sieve} can abort as soon as @{term n} is too big to let
   179       @{const mark_out} have any effect.
   180 
   181     \item Search for further primes can be given up as soon as the search
   182       position exceeds the square root of the maximum candidate.
   183 
   184   \end{itemize}
   185 
   186   This is left as an constructive exercise to the reader.
   187 \<close>
   188 
   189 lemma numbers_of_marks_sieve:
   190   "numbers_of_marks (Suc n) (sieve n bs) =
   191     {q \<in> numbers_of_marks (Suc n) bs. \<forall>m \<in> numbers_of_marks (Suc n) bs. \<not> m dvd_strict q}"
   192 proof (induct n bs rule: sieve.induct)
   193   case 1
   194   show ?case by simp
   195 next
   196   case 2
   197   then show ?case by simp
   198 next
   199   case (3 n bs)
   200   have aux: "n \<in> Suc ` M \<longleftrightarrow> n > 0 \<and> n - 1 \<in> M" (is "?lhs \<longleftrightarrow> ?rhs") for M n
   201   proof
   202     show ?rhs if ?lhs using that by auto
   203     show ?lhs if ?rhs
   204     proof -
   205       from that have "n > 0" and "n - 1 \<in> M" by auto
   206       then have "Suc (n - 1) \<in> Suc ` M" by blast
   207       with \<open>n > 0\<close> show "n \<in> Suc ` M" by simp
   208     qed
   209   qed
   210   have aux1: False if "Suc (Suc n) \<le> m" and "m dvd Suc n" for m :: nat
   211   proof -
   212     from \<open>m dvd Suc n\<close> obtain q where "Suc n = m * q" ..
   213     with \<open>Suc (Suc n) \<le> m\<close> have "Suc (m * q) \<le> m" by simp
   214     then have "m * q < m" by arith
   215     then have "q = 0" by simp
   216     with \<open>Suc n = m * q\<close> show ?thesis by simp
   217   qed
   218   have aux2: "m dvd q"
   219     if 1: "\<forall>q>0. 1 < q \<longrightarrow> Suc n < q \<longrightarrow> q \<le> Suc (n + length bs) \<longrightarrow>
   220       bs ! (q - Suc (Suc n)) \<longrightarrow> \<not> Suc n dvd q \<longrightarrow> q dvd m \<longrightarrow> m dvd q"
   221     and 2: "\<not> Suc n dvd m" "q dvd m"
   222     and 3: "Suc n < q" "q \<le> Suc (n + length bs)" "bs ! (q - Suc (Suc n))"
   223     for m q :: nat
   224   proof -
   225     from 1 have *: "\<And>q. Suc n < q \<Longrightarrow> q \<le> Suc (n + length bs) \<Longrightarrow>
   226       bs ! (q - Suc (Suc n)) \<Longrightarrow> \<not> Suc n dvd q \<Longrightarrow> q dvd m \<Longrightarrow> m dvd q"
   227       by auto
   228     from 2 have "\<not> Suc n dvd q" by (auto elim: dvdE)
   229     moreover note 3
   230     moreover note \<open>q dvd m\<close>
   231     ultimately show ?thesis by (auto intro: *)
   232   qed
   233   from 3 show ?case
   234     apply (simp_all add: numbers_of_marks_mark_out numbers_of_marks_Suc Compr_image_eq
   235       inj_image_eq_iff in_numbers_of_marks_eq Ball_def imp_conjL aux)
   236     apply safe
   237     apply (simp_all add: less_diff_conv2 le_diff_conv2 dvd_minus_self not_less)
   238     apply (clarsimp dest!: aux1)
   239     apply (simp add: Suc_le_eq less_Suc_eq_le)
   240     apply (rule aux2)
   241     apply (clarsimp dest!: aux1)+
   242     done
   243 qed
   244 
   245 
   246 text \<open>Relation of the sieve algorithm to actual primes\<close>
   247 
   248 definition primes_upto :: "nat \<Rightarrow> nat list"
   249 where
   250   "primes_upto n = sorted_list_of_set {m. m \<le> n \<and> prime m}"
   251 
   252 lemma set_primes_upto: "set (primes_upto n) = {m. m \<le> n \<and> prime m}"
   253   by (simp add: primes_upto_def)
   254 
   255 lemma sorted_primes_upto [iff]: "sorted (primes_upto n)"
   256   by (simp add: primes_upto_def)
   257 
   258 lemma distinct_primes_upto [iff]: "distinct (primes_upto n)"
   259   by (simp add: primes_upto_def)
   260 
   261 lemma set_primes_upto_sieve:
   262   "set (primes_upto n) = numbers_of_marks 2 (sieve 1 (replicate (n - 1) True))"
   263 proof -
   264   consider "n = 0 \<or> n = 1" | "n > 1" by arith
   265   then show ?thesis
   266   proof cases
   267     case 1
   268     then show ?thesis
   269       by (auto simp add: numbers_of_marks_sieve numeral_2_eq_2 set_primes_upto
   270         dest: prime_gt_Suc_0_nat)
   271   next
   272     case 2
   273     {
   274       fix m q
   275       assume "Suc (Suc 0) \<le> q"
   276         and "q < Suc n"
   277         and "m dvd q"
   278       then have "m < Suc n" by (auto dest: dvd_imp_le)
   279       assume *: "\<forall>m\<in>{Suc (Suc 0)..<Suc n}. m dvd q \<longrightarrow> q dvd m"
   280         and "m dvd q" and "m \<noteq> 1"
   281       have "m = q"
   282       proof (cases "m = 0")
   283         case True with \<open>m dvd q\<close> show ?thesis by simp
   284       next
   285         case False with \<open>m \<noteq> 1\<close> have "Suc (Suc 0) \<le> m" by arith
   286         with \<open>m < Suc n\<close> * \<open>m dvd q\<close> have "q dvd m" by simp
   287         with \<open>m dvd q\<close> show ?thesis by (simp add: dvd_antisym)
   288       qed
   289     }
   290     then have aux: "\<And>m q. Suc (Suc 0) \<le> q \<Longrightarrow>
   291       q < Suc n \<Longrightarrow>
   292       m dvd q \<Longrightarrow>
   293       \<forall>m\<in>{Suc (Suc 0)..<Suc n}. m dvd q \<longrightarrow> q dvd m \<Longrightarrow>
   294       m dvd q \<Longrightarrow> m \<noteq> q \<Longrightarrow> m = 1" by auto
   295     from 2 show ?thesis
   296       apply (auto simp add: numbers_of_marks_sieve numeral_2_eq_2 set_primes_upto
   297         dest: prime_gt_Suc_0_nat)
   298       apply (metis One_nat_def Suc_le_eq less_not_refl prime_nat_iff)
   299       apply (metis One_nat_def Suc_le_eq aux prime_nat_iff)
   300       done
   301   qed
   302 qed
   303 
   304 lemma primes_upto_sieve [code]:
   305   "primes_upto n = map fst (filter snd (enumerate 2 (sieve 1 (replicate (n - 1) True))))"
   306 proof -
   307   have "primes_upto n = sorted_list_of_set (numbers_of_marks 2 (sieve 1 (replicate (n - 1) True)))"
   308     apply (rule sorted_distinct_set_unique)
   309     apply (simp_all only: set_primes_upto_sieve numbers_of_marks_def)
   310     apply auto
   311     done
   312   then show ?thesis
   313     by (simp add: sorted_list_of_set_numbers_of_marks)
   314 qed
   315 
   316 lemma prime_in_primes_upto: "prime n \<longleftrightarrow> n \<in> set (primes_upto n)"
   317   by (simp add: set_primes_upto)
   318 
   319 
   320 subsection \<open>Application: smallest prime beyond a certain number\<close>
   321 
   322 definition smallest_prime_beyond :: "nat \<Rightarrow> nat"
   323 where
   324   "smallest_prime_beyond n = (LEAST p. prime p \<and> p \<ge> n)"
   325 
   326 lemma prime_smallest_prime_beyond [iff]: "prime (smallest_prime_beyond n)" (is ?P)
   327   and smallest_prime_beyond_le [iff]: "smallest_prime_beyond n \<ge> n" (is ?Q)
   328 proof -
   329   let ?least = "LEAST p. prime p \<and> p \<ge> n"
   330   from primes_infinite obtain q where "prime q \<and> q \<ge> n"
   331     by (metis finite_nat_set_iff_bounded_le mem_Collect_eq nat_le_linear)
   332   then have "prime ?least \<and> ?least \<ge> n"
   333     by (rule LeastI)
   334   then show ?P and ?Q
   335     by (simp_all add: smallest_prime_beyond_def)
   336 qed
   337 
   338 lemma smallest_prime_beyond_smallest: "prime p \<Longrightarrow> p \<ge> n \<Longrightarrow> smallest_prime_beyond n \<le> p"
   339   by (simp only: smallest_prime_beyond_def) (auto intro: Least_le)
   340 
   341 lemma smallest_prime_beyond_eq:
   342   "prime p \<Longrightarrow> p \<ge> n \<Longrightarrow> (\<And>q. prime q \<Longrightarrow> q \<ge> n \<Longrightarrow> q \<ge> p) \<Longrightarrow> smallest_prime_beyond n = p"
   343   by (simp only: smallest_prime_beyond_def) (auto intro: Least_equality)
   344 
   345 definition smallest_prime_between :: "nat \<Rightarrow> nat \<Rightarrow> nat option"
   346 where
   347   "smallest_prime_between m n =
   348     (if (\<exists>p. prime p \<and> m \<le> p \<and> p \<le> n) then Some (smallest_prime_beyond m) else None)"
   349 
   350 lemma smallest_prime_between_None:
   351   "smallest_prime_between m n = None \<longleftrightarrow> (\<forall>q. m \<le> q \<and> q \<le> n \<longrightarrow> \<not> prime q)"
   352   by (auto simp add: smallest_prime_between_def)
   353 
   354 lemma smallest_prime_betwen_Some:
   355   "smallest_prime_between m n = Some p \<longleftrightarrow> smallest_prime_beyond m = p \<and> p \<le> n"
   356   by (auto simp add: smallest_prime_between_def dest: smallest_prime_beyond_smallest [of _ m])
   357 
   358 lemma [code]: "smallest_prime_between m n = List.find (\<lambda>p. p \<ge> m) (primes_upto n)"
   359 proof -
   360   have "List.find (\<lambda>p. p \<ge> m) (primes_upto n) = Some (smallest_prime_beyond m)"
   361     if assms: "m \<le> p" "prime p" "p \<le> n" for p
   362   proof -
   363     define A where "A = {p. p \<le> n \<and> prime p \<and> m \<le> p}"
   364     from assms have "smallest_prime_beyond m \<le> p"
   365       by (auto intro: smallest_prime_beyond_smallest)
   366     from this \<open>p \<le> n\<close> have *: "smallest_prime_beyond m \<le> n"
   367       by (rule order_trans)
   368     from assms have ex: "\<exists>p\<le>n. prime p \<and> m \<le> p"
   369       by auto
   370     then have "finite A"
   371       by (auto simp add: A_def)
   372     with * have "Min A = smallest_prime_beyond m"
   373       by (auto simp add: A_def intro: Min_eqI smallest_prime_beyond_smallest)
   374     with ex sorted_primes_upto show ?thesis
   375       by (auto simp add: set_primes_upto sorted_find_Min A_def)
   376   qed
   377   then show ?thesis
   378     by (auto simp add: smallest_prime_between_def find_None_iff set_primes_upto
   379       intro!: sym [of _ None])
   380 qed
   381 
   382 definition smallest_prime_beyond_aux :: "nat \<Rightarrow> nat \<Rightarrow> nat"
   383 where
   384   "smallest_prime_beyond_aux k n = smallest_prime_beyond n"
   385 
   386 lemma [code]:
   387   "smallest_prime_beyond_aux k n =
   388     (case smallest_prime_between n (k * n) of
   389       Some p \<Rightarrow> p
   390     | None \<Rightarrow> smallest_prime_beyond_aux (Suc k) n)"
   391   by (simp add: smallest_prime_beyond_aux_def smallest_prime_betwen_Some split: option.split)
   392 
   393 lemma [code]: "smallest_prime_beyond n = smallest_prime_beyond_aux 2 n"
   394   by (simp add: smallest_prime_beyond_aux_def)
   395 
   396 end