author haftmann
Sun, 09 Nov 2014 10:03:18 +0100
changeset 58954 18750e86d5b8
parent 58889 5b7a9633cfa8
child 60526 fad653acf58f
permissions -rw-r--r--
reverted 1ebf0a1f12a4 after successful re-tuning of simp rules for divisibility

(*  Title:      HOL/Number_Theory/Eratosthenes.thy
    Author:     Florian Haftmann, TU Muenchen

section {* The sieve of Eratosthenes *}

theory Eratosthenes
imports Main Primes

subsection {* Preliminary: strict divisibility *}

context dvd

abbreviation dvd_strict :: "'a \<Rightarrow> 'a \<Rightarrow> bool" (infixl "dvd'_strict" 50)
  "b dvd_strict a \<equiv> b dvd a \<and> \<not> a dvd b"


subsection {* Main corpus *}

text {* The sieve is modelled as a list of booleans, where @{const False} means \emph{marked out}. *}

type_synonym marks = "bool list"

definition numbers_of_marks :: "nat \<Rightarrow> marks \<Rightarrow> nat set"
  "numbers_of_marks n bs = fst ` {x \<in> set (enumerate n bs). snd x}"

lemma numbers_of_marks_simps [simp, code]:
  "numbers_of_marks n [] = {}"
  "numbers_of_marks n (True # bs) = insert n (numbers_of_marks (Suc n) bs)"
  "numbers_of_marks n (False # bs) = numbers_of_marks (Suc n) bs"
  by (auto simp add: numbers_of_marks_def intro!: image_eqI)

lemma numbers_of_marks_Suc:
  "numbers_of_marks (Suc n) bs = Suc ` numbers_of_marks n bs"
  by (auto simp add: numbers_of_marks_def enumerate_Suc_eq image_iff Bex_def)

lemma numbers_of_marks_replicate_False [simp]:
  "numbers_of_marks n (replicate m False) = {}"
  by (auto simp add: numbers_of_marks_def enumerate_replicate_eq)

lemma numbers_of_marks_replicate_True [simp]:
  "numbers_of_marks n (replicate m True) = {n..<n+m}"
  by (auto simp add: numbers_of_marks_def enumerate_replicate_eq image_def)

lemma in_numbers_of_marks_eq:
  "m \<in> numbers_of_marks n bs \<longleftrightarrow> m \<in> {n..<n + length bs} \<and> bs ! (m - n)"
  by (simp add: numbers_of_marks_def in_set_enumerate_eq image_iff add.commute)

lemma sorted_list_of_set_numbers_of_marks:
  "sorted_list_of_set (numbers_of_marks n bs) = map fst (filter snd (enumerate n bs))"
  by (auto simp add: numbers_of_marks_def distinct_map
    intro!: sorted_filter distinct_filter inj_onI sorted_distinct_set_unique)

text {* Marking out multiples in a sieve  *}
definition mark_out :: "nat \<Rightarrow> marks \<Rightarrow> marks"
  "mark_out n bs = map (\<lambda>(q, b). b \<and> \<not> Suc n dvd Suc (Suc q)) (enumerate n bs)"

lemma mark_out_Nil [simp]:
  "mark_out n [] = []"
  by (simp add: mark_out_def)
lemma length_mark_out [simp]:
  "length (mark_out n bs) = length bs"
  by (simp add: mark_out_def)

lemma numbers_of_marks_mark_out:
  "numbers_of_marks n (mark_out m bs) = {q \<in> numbers_of_marks n bs. \<not> Suc m dvd Suc q - n}"
  by (auto simp add: numbers_of_marks_def mark_out_def in_set_enumerate_eq image_iff
    nth_enumerate_eq less_eq_dvd_minus)

text {* Auxiliary operation for efficient implementation  *}

definition mark_out_aux :: "nat \<Rightarrow> nat \<Rightarrow> marks \<Rightarrow> marks"
  "mark_out_aux n m bs =
    map (\<lambda>(q, b). b \<and> (q < m + n \<or> \<not> Suc n dvd Suc (Suc q) + (n - m mod Suc n))) (enumerate n bs)"

lemma mark_out_code [code]:
  "mark_out n bs = mark_out_aux n n bs"
proof -
  { fix a
    assume A: "Suc n dvd Suc (Suc a)"
      and B: "a < n + n"
      and C: "n \<le> a"
    have False
    proof (cases "n = 0")
      case True with A B C show False by simp
      def m \<equiv> "Suc n" then have "m > 0" by simp
      case False then have "n > 0" by simp
      from A obtain q where q: "Suc (Suc a) = Suc n * q" by (rule dvdE)
      have "q > 0"
      proof (rule ccontr)
        assume "\<not> q > 0"
        with q show False by simp
      with `n > 0` have "Suc n * q \<ge> 2" by (auto simp add: gr0_conv_Suc)
      with q have a: "a = Suc n * q - 2" by simp
      with B have "q + n * q < n + n + 2"
        by auto
      then have "m * q < m * 2" by (simp add: m_def)
      with `m > 0` have "q < 2" by simp
      with `q > 0` have "q = 1" by simp
      with a have "a = n - 1" by simp
      with `n > 0` C show False by simp
  } note aux = this 
  show ?thesis
    by (auto simp add: mark_out_def mark_out_aux_def in_set_enumerate_eq intro: aux)

lemma mark_out_aux_simps [simp, code]:
  "mark_out_aux n m [] = []" (is ?thesis1)
  "mark_out_aux n 0 (b # bs) = False # mark_out_aux n n bs" (is ?thesis2)
  "mark_out_aux n (Suc m) (b # bs) = b # mark_out_aux n m bs" (is ?thesis3)
proof -
  show ?thesis1
    by (simp add: mark_out_aux_def)
  show ?thesis2
    by (auto simp add: mark_out_code [symmetric] mark_out_aux_def mark_out_def
      enumerate_Suc_eq in_set_enumerate_eq less_eq_dvd_minus)
  { def v \<equiv> "Suc m" and w \<equiv> "Suc n"
    fix q
    assume "m + n \<le> q"
    then obtain r where q: "q = m + n + r" by (auto simp add: le_iff_add)
    { fix u
      from w_def have "u mod w < w" by simp
      then have "u + (w - u mod w) = w + (u - u mod w)"
        by simp
      then have "u + (w - u mod w) = w + u div w * w"
        by (simp add: div_mod_equality' [symmetric])
    then have "w dvd v + w + r + (w - v mod w) \<longleftrightarrow> w dvd m + w + r + (w - m mod w)"
      by (simp add: add.assoc add.left_commute [of m] add.left_commute [of v]
        dvd_add_left_iff dvd_add_right_iff)
    moreover from q have "Suc q = m + w + r" by (simp add: w_def)
    moreover from q have "Suc (Suc q) = v + w + r" by (simp add: v_def w_def)
    ultimately have "w dvd Suc (Suc (q + (w - v mod w))) \<longleftrightarrow> w dvd Suc (q + (w - m mod w))"
      by (simp only: add_Suc [symmetric])
    then have "Suc n dvd Suc (Suc (Suc (q + n) - Suc m mod Suc n)) \<longleftrightarrow>
      Suc n dvd Suc (Suc (q + n - m mod Suc n))"
      by (simp add: v_def w_def Suc_diff_le trans_le_add2)
  then show ?thesis3
    by (auto simp add: mark_out_aux_def
      enumerate_Suc_eq in_set_enumerate_eq not_less)

text {* Main entry point to sieve *}

fun sieve :: "nat \<Rightarrow> marks \<Rightarrow> marks"
  "sieve n [] = []"
| "sieve n (False # bs) = False # sieve (Suc n) bs"
| "sieve n (True # bs) = True # sieve (Suc n) (mark_out n bs)"

text {*
  There are the following possible optimisations here:


    \item @{const sieve} can abort as soon as @{term n} is too big to let
      @{const mark_out} have any effect.

    \item Search for further primes can be given up as soon as the search
      position exceeds the square root of the maximum candidate.


  This is left as an constructive exercise to the reader.

lemma numbers_of_marks_sieve:
  "numbers_of_marks (Suc n) (sieve n bs) =
    {q \<in> numbers_of_marks (Suc n) bs. \<forall>m \<in> numbers_of_marks (Suc n) bs. \<not> m dvd_strict q}"
proof (induct n bs rule: sieve.induct)
  case 1 show ?case by simp
  case 2 then show ?case by simp
  case (3 n bs)
  have aux: "\<And>M n. n \<in> Suc ` M \<longleftrightarrow> n > 0 \<and> n - 1 \<in> M"
    fix M and n
    assume "n \<in> Suc ` M" then show "n > 0 \<and> n - 1 \<in> M" by auto
    fix M and n :: nat
    assume "n > 0 \<and> n - 1 \<in> M"
    then have "n > 0" and "n - 1 \<in> M" by auto
    then have "Suc (n - 1) \<in> Suc ` M" by blast
    with `n > 0` show "n \<in> Suc ` M" by simp
  { fix m :: nat
    assume "Suc (Suc n) \<le> m" and "m dvd Suc n"
    from `m dvd Suc n` obtain q where "Suc n = m * q" ..
    with `Suc (Suc n) \<le> m` have "Suc (m * q) \<le> m" by simp
    then have "m * q < m" by arith
    then have "q = 0" by simp
    with `Suc n = m * q` have False by simp
  } note aux1 = this
  { fix m q :: nat
    assume "\<forall>q>0. 1 < q \<longrightarrow> Suc n < q \<longrightarrow> q \<le> Suc (n + length bs)
      \<longrightarrow> bs ! (q - Suc (Suc n)) \<longrightarrow> \<not> Suc n dvd q \<longrightarrow> q dvd m \<longrightarrow> m dvd q"
    then have *: "\<And>q. Suc n < q \<Longrightarrow> q \<le> Suc (n + length bs)
      \<Longrightarrow> bs ! (q - Suc (Suc n)) \<Longrightarrow> \<not> Suc n dvd q \<Longrightarrow> q dvd m \<Longrightarrow> m dvd q"
      by auto
    assume "\<not> Suc n dvd m" and "q dvd m"
    then have "\<not> Suc n dvd q" by (auto elim: dvdE)
    moreover assume "Suc n < q" and "q \<le> Suc (n + length bs)"
      and "bs ! (q - Suc (Suc n))"
    moreover note `q dvd m`
    ultimately have "m dvd q" by (auto intro: *)
  } note aux2 = this
  from 3 show ?case
    apply (simp_all add: numbers_of_marks_mark_out numbers_of_marks_Suc Compr_image_eq inj_image_eq_iff
      in_numbers_of_marks_eq Ball_def imp_conjL aux)
    apply safe
    apply (simp_all add: less_diff_conv2 le_diff_conv2 dvd_minus_self not_less)
    apply (clarsimp dest!: aux1)
    apply (simp add: Suc_le_eq less_Suc_eq_le)
    apply (rule aux2) apply (clarsimp dest!: aux1)+

text {* Relation of the sieve algorithm to actual primes *}

definition primes_upto :: "nat \<Rightarrow> nat list"
  "primes_upto n = sorted_list_of_set {m. m \<le> n \<and> prime m}"

lemma set_primes_upto:
  "set (primes_upto n) = {m. m \<le> n \<and> prime m}"
  by (simp add: primes_upto_def)

lemma sorted_primes_upto [iff]:
  "sorted (primes_upto n)"
  by (simp add: primes_upto_def)

lemma distinct_primes_upto [iff]:
  "distinct (primes_upto n)"
  by (simp add: primes_upto_def)

lemma set_primes_upto_sieve:
  "set (primes_upto n) = numbers_of_marks 2 (sieve 1 (replicate (n - 1) True))"
proof (cases "n > 1")
  case False then have "n = 0 \<or> n = 1" by arith
  then show ?thesis
    by (auto simp add: numbers_of_marks_sieve numeral_2_eq_2 set_primes_upto dest: prime_gt_Suc_0_nat)
  { fix m q
    assume "Suc (Suc 0) \<le> q"
      and "q < Suc n"
      and "m dvd q"
    then have "m < Suc n" by (auto dest: dvd_imp_le)
    assume *: "\<forall>m\<in>{Suc (Suc 0)..<Suc n}. m dvd q \<longrightarrow> q dvd m"
      and "m dvd q" and "m \<noteq> 1"
    have "m = q" proof (cases "m = 0")
      case True with `m dvd q` show ?thesis by simp
      case False with `m \<noteq> 1` have "Suc (Suc 0) \<le> m" by arith
      with `m < Suc n` * `m dvd q` have "q dvd m" by simp
      with `m dvd q` show ?thesis by (simp add: dvd.eq_iff)
  then have aux: "\<And>m q. Suc (Suc 0) \<le> q \<Longrightarrow>
    q < Suc n \<Longrightarrow>
    m dvd q \<Longrightarrow>
    \<forall>m\<in>{Suc (Suc 0)..<Suc n}. m dvd q \<longrightarrow> q dvd m \<Longrightarrow>
    m dvd q \<Longrightarrow> m \<noteq> q \<Longrightarrow> m = 1" by auto
  case True then show ?thesis
    apply (auto simp add: One_nat_def numbers_of_marks_sieve numeral_2_eq_2 set_primes_upto
        dest: prime_gt_Suc_0_nat)
    apply (metis One_nat_def Suc_le_eq less_not_refl prime_nat_def)
    apply (metis One_nat_def Suc_le_eq aux prime_nat_def)

lemma primes_upto_sieve [code]:
  "primes_upto n = map fst (filter snd (enumerate 2 (sieve 1 (replicate (n - 1) True))))"
proof -
  have "primes_upto n = sorted_list_of_set (numbers_of_marks 2 (sieve 1 (replicate (n - 1) True)))"
    apply (rule sorted_distinct_set_unique)
    apply (simp_all only: set_primes_upto_sieve numbers_of_marks_def)
    apply auto
  then show ?thesis by (simp add: sorted_list_of_set_numbers_of_marks)

lemma prime_in_primes_upto:
  "prime n \<longleftrightarrow> n \<in> set (primes_upto n)"
  by (simp add: set_primes_upto)

subsection {* Application: smallest prime beyond a certain number *}

definition smallest_prime_beyond :: "nat \<Rightarrow> nat"
  "smallest_prime_beyond n = (LEAST p. prime p \<and> p \<ge> n)"

  prime_smallest_prime_beyond [iff]: "prime (smallest_prime_beyond n)" (is ?P)
  and smallest_prime_beyond_le [iff]: "smallest_prime_beyond n \<ge> n" (is ?Q)
proof -
  let ?least = "LEAST p. prime p \<and> p \<ge> n"
  from primes_infinite obtain q where "prime q \<and> q \<ge> n"
    by (metis finite_nat_set_iff_bounded_le mem_Collect_eq nat_le_linear)
  then have "prime ?least \<and> ?least \<ge> n" by (rule LeastI)
  then show ?P and ?Q by (simp_all add: smallest_prime_beyond_def)

lemma smallest_prime_beyond_smallest:
  "prime p \<Longrightarrow> p \<ge> n \<Longrightarrow> smallest_prime_beyond n \<le> p"
  by (simp only: smallest_prime_beyond_def) (auto intro: Least_le)

lemma smallest_prime_beyond_eq:
  "prime p \<Longrightarrow> p \<ge> n \<Longrightarrow> (\<And>q. prime q \<Longrightarrow> q \<ge> n \<Longrightarrow> q \<ge> p) \<Longrightarrow> smallest_prime_beyond n = p"
  by (simp only: smallest_prime_beyond_def) (auto intro: Least_equality)

definition smallest_prime_between :: "nat \<Rightarrow> nat \<Rightarrow> nat option"
  "smallest_prime_between m n =
    (if (\<exists>p. prime p \<and> m \<le> p \<and> p \<le> n) then Some (smallest_prime_beyond m) else None)"

lemma smallest_prime_between_None:
  "smallest_prime_between m n = None \<longleftrightarrow> (\<forall>q. m \<le> q \<and> q \<le> n \<longrightarrow> \<not> prime q)"
  by (auto simp add: smallest_prime_between_def)

lemma smallest_prime_betwen_Some:
  "smallest_prime_between m n = Some p \<longleftrightarrow> smallest_prime_beyond m = p \<and> p \<le> n"
  by (auto simp add: smallest_prime_between_def dest: smallest_prime_beyond_smallest [of _ m])

lemma [code]:
  "smallest_prime_between m n = List.find (\<lambda>p. p \<ge> m) (primes_upto n)"
proof -
  { fix p
    def A \<equiv> "{p. p \<le> n \<and> prime p \<and> m \<le> p}"
    assume assms: "m \<le> p" "prime p" "p \<le> n"
    then have "smallest_prime_beyond m \<le> p" by (auto intro: smallest_prime_beyond_smallest)
    from this `p \<le> n` have *: "smallest_prime_beyond m \<le> n" by (rule order_trans)
    from assms have ex: "\<exists>p\<le>n. prime p \<and> m \<le> p" by auto
    then have "finite A" by (auto simp add: A_def)
    with * have "Min A = smallest_prime_beyond m"
      by (auto simp add: A_def intro: Min_eqI smallest_prime_beyond_smallest)
    with ex sorted_primes_upto have "List.find (\<lambda>p. p \<ge> m) (primes_upto n) = Some (smallest_prime_beyond m)"
      by (auto simp add: set_primes_upto sorted_find_Min A_def)
  } then show ?thesis  
  by (auto simp add: smallest_prime_between_def find_None_iff set_primes_upto intro!: sym [of _ None])

definition smallest_prime_beyond_aux :: "nat \<Rightarrow> nat \<Rightarrow> nat"
  "smallest_prime_beyond_aux k n = smallest_prime_beyond n"

lemma [code]:
  "smallest_prime_beyond_aux k n =
    (case smallest_prime_between n (k * n)
     of Some p \<Rightarrow> p
      | None \<Rightarrow> smallest_prime_beyond_aux (Suc k) n)"
  by (simp add: smallest_prime_beyond_aux_def smallest_prime_betwen_Some split: option.split)

lemma [code]:
  "smallest_prime_beyond n = smallest_prime_beyond_aux 2 n"
  by (simp add: smallest_prime_beyond_aux_def)