Theory Interval_Tree

(* Author: Bohua Zhan, with modifications by Tobias Nipkow *)

section ‹Interval Trees›

theory Interval_Tree
imports
  "HOL-Data_Structures.Cmp"
  "HOL-Data_Structures.List_Ins_Del"
  "HOL-Data_Structures.Isin2"
  "HOL-Data_Structures.Set_Specs"
begin

subsection ‹Intervals›

text ‹The following definition of intervals uses the ❙‹typedef› command
to define the type of non-empty intervals as a subset of the type of pairs ‹p›
where @{prop "fst p ≤ snd p"}:›

typedef (overloaded) 'a::linorder ivl =
  "{p :: 'a × 'a. fst p ≤ snd p}" by auto

text ‹More precisely, @{typ "'a ivl"} is isomorphic with that subset via the function
@{const Rep_ivl}. Hence the basic interval properties are not immediate but
need simple proofs:›

definition low :: "'a::linorder ivl ⇒ 'a" where
"low p = fst (Rep_ivl p)"

definition high :: "'a::linorder ivl ⇒ 'a" where
"high p = snd (Rep_ivl p)"

lemma ivl_is_interval: "low p ≤ high p"
by (metis Rep_ivl high_def low_def mem_Collect_eq)

lemma ivl_inj: "low p = low q ⟹ high p = high q ⟹ p = q"
by (metis Rep_ivl_inverse high_def low_def prod_eqI)

text ‹Now we can forget how exactly intervals were defined.›


instantiation ivl :: (linorder) linorder begin

definition ivl_less: "(x < y) = (low x < low y | (low x = low y ∧ high x < high y))"
definition ivl_less_eq: "(x ≤ y) = (low x < low y | (low x = low y ∧ high x ≤ high y))"

instance proof
  fix x y z :: "'a ivl"
  show a: "(x < y) = (x ≤ y ∧ ¬ y ≤ x)"
    using ivl_less ivl_less_eq by force
  show b: "x ≤ x"
    by (simp add: ivl_less_eq)
  show c: "x ≤ y ⟹ y ≤ z ⟹ x ≤ z"
    using ivl_less_eq by fastforce
  show d: "x ≤ y ⟹ y ≤ x ⟹ x = y"
    using ivl_less_eq a ivl_inj ivl_less by fastforce
  show e: "x ≤ y ∨ y ≤ x"
    by (meson ivl_less_eq leI not_less_iff_gr_or_eq)
qed end


definition overlap :: "('a::linorder) ivl ⇒ 'a ivl ⇒ bool" where
"overlap x y ⟷ (high x ≥ low y ∧ high y ≥ low x)"

definition has_overlap :: "('a::linorder) ivl set ⇒ 'a ivl ⇒ bool" where
"has_overlap S y ⟷ (∃x∈S. overlap x y)"


subsection ‹Interval Trees›

type_synonym 'a ivl_tree = "('a ivl * 'a) tree"

fun max_hi :: "('a::order_bot) ivl_tree ⇒ 'a" where
"max_hi Leaf = bot" |
"max_hi (Node _ (_,m) _) = m"

definition max3 :: "('a::{linorder,order_bot}) ivl ⇒ 'a ivl_tree ⇒ 'a ivl_tree ⇒ 'a" where
"max3 a l r = max (high a) (max (max_hi l) (max_hi r))"

fun inv_max_hi :: "('a::{linorder,order_bot}) ivl_tree ⇒ bool" where
"inv_max_hi Leaf ⟷ True" |
"inv_max_hi (Node l (a, m) r) ⟷ (m = max3 a l r ∧ inv_max_hi l ∧ inv_max_hi r)"

lemma max_hi_is_max:
  "inv_max_hi t ⟹ a ∈ set_tree t ⟹ high a ≤ max_hi t"
by (induct t, auto simp add: max3_def max_def)

lemma max_hi_exists:
  "inv_max_hi t ⟹ t ≠ Leaf ⟹ ∃a∈set_tree t. high a = max_hi t"
proof (induction t rule: tree2_induct)
  case Leaf
  then show ?case by auto
next
  case N: (Node l v m r)
  then show ?case
  proof (cases l rule: tree2_cases)
    case Leaf
    then show ?thesis
      using N.prems(1) N.IH(2) by (cases r, auto simp add: max3_def max_def le_bot)
  next
    case Nl: Node
    then show ?thesis
    proof (cases r rule: tree2_cases)
      case Leaf
      then show ?thesis
        using N.prems(1) N.IH(1) Nl by (auto simp add: max3_def max_def le_bot)
    next
      case Nr: Node
      obtain p1 where p1: "p1 ∈ set_tree l" "high p1 = max_hi l"
        using N.IH(1) N.prems(1) Nl by auto
      obtain p2 where p2: "p2 ∈ set_tree r" "high p2 = max_hi r"
        using N.IH(2) N.prems(1) Nr by auto
      then show ?thesis
        using p1 p2 N.prems(1) by (auto simp add: max3_def max_def)
    qed
  qed
qed


subsection ‹Insertion and Deletion›

definition node where
[simp]: "node l a r = Node l (a, max3 a l r) r"

fun insert :: "'a::{linorder,order_bot} ivl ⇒ 'a ivl_tree ⇒ 'a ivl_tree" where
"insert x Leaf = Node Leaf (x, high x) Leaf" |
"insert x (Node l (a, m) r) =
  (case cmp x a of
     EQ ⇒ Node l (a, m) r |
     LT ⇒ node (insert x l) a r |
     GT ⇒ node l a (insert x r))"

lemma inorder_insert:
  "sorted (inorder t) ⟹ inorder (insert x t) = ins_list x (inorder t)"
by (induct t rule: tree2_induct) (auto simp: ins_list_simps)

lemma inv_max_hi_insert:
  "inv_max_hi t ⟹ inv_max_hi (insert x t)"
by (induct t rule: tree2_induct) (auto simp add: max3_def)

fun split_min :: "'a::{linorder,order_bot} ivl_tree ⇒ 'a ivl × 'a ivl_tree" where
"split_min (Node l (a, m) r) =
  (if l = Leaf then (a, r)
   else let (x,l') = split_min l in (x, node l' a r))"

fun delete :: "'a::{linorder,order_bot} ivl ⇒ 'a ivl_tree ⇒ 'a ivl_tree" where
"delete x Leaf = Leaf" |
"delete x (Node l (a, m) r) =
  (case cmp x a of
     LT ⇒ node (delete x l) a r |
     GT ⇒ node l a (delete x r) |
     EQ ⇒ if r = Leaf then l else
           let (a', r') = split_min r in node l a' r')"

lemma split_minD:
  "split_min t = (x,t') ⟹ t ≠ Leaf ⟹ x # inorder t' = inorder t"
by (induct t arbitrary: t' rule: split_min.induct)
   (auto simp: sorted_lems split: prod.splits if_splits)

lemma inorder_delete:
  "sorted (inorder t) ⟹ inorder (delete x t) = del_list x (inorder t)"
by (induct t)
   (auto simp: del_list_simps split_minD Let_def split: prod.splits)

lemma inv_max_hi_split_min:
  "⟦ t ≠ Leaf;  inv_max_hi t ⟧ ⟹ inv_max_hi (snd (split_min t))"
by (induct t) (auto split: prod.splits)

lemma inv_max_hi_delete:
  "inv_max_hi t ⟹ inv_max_hi (delete x t)"
apply (induct t)
 apply simp
using inv_max_hi_split_min by (fastforce simp add: Let_def split: prod.splits)


subsection ‹Search›

text ‹Does interval ‹x› overlap with any interval in the tree?›

fun search :: "'a::{linorder,order_bot} ivl_tree ⇒ 'a ivl ⇒ bool" where
"search Leaf x = False" |
"search (Node l (a, m) r) x =
  (if overlap x a then True
   else if l ≠ Leaf ∧ max_hi l ≥ low x then search l x
   else search r x)"

lemma search_correct:
  "inv_max_hi t ⟹ sorted (inorder t) ⟹ search t x = has_overlap (set_tree t) x"
proof (induction t rule: tree2_induct)
  case Leaf
  then show ?case by (auto simp add: has_overlap_def)
next
  case (Node l a m r)
  have search_l: "search l x = has_overlap (set_tree l) x"
    using Node.IH(1) Node.prems by (auto simp: sorted_wrt_append)
  have search_r: "search r x = has_overlap (set_tree r) x"
    using Node.IH(2) Node.prems by (auto simp: sorted_wrt_append)
  show ?case
  proof (cases "overlap a x")
    case True
    thus ?thesis by (auto simp: overlap_def has_overlap_def)
  next
    case a_disjoint: False
    then show ?thesis
    proof cases
      assume [simp]: "l = Leaf"
      have search_eval: "search (Node l (a, m) r) x = search r x"
        using a_disjoint overlap_def by auto
      show ?thesis
        unfolding search_eval search_r
        by (auto simp add: has_overlap_def a_disjoint)
    next
      assume "l ≠ Leaf"
      then show ?thesis
      proof (cases "max_hi l ≥ low x")
        case max_hi_l_ge: True
        have "inv_max_hi l"
          using Node.prems(1) by auto
        then obtain p where p: "p ∈ set_tree l" "high p = max_hi l"
          using ‹l ≠ Leaf› max_hi_exists by auto
        have search_eval: "search (Node l (a, m) r) x = search l x"
          using a_disjoint ‹l ≠ Leaf› max_hi_l_ge by (auto simp: overlap_def)
        show ?thesis
        proof (cases "low p ≤ high x")
          case True
          have "overlap p x"
            unfolding overlap_def using True p(2) max_hi_l_ge by auto
          then show ?thesis
            unfolding search_eval search_l
            using p(1) by(auto simp: has_overlap_def overlap_def)
        next
          case False
          have "¬overlap x rp" if asm: "rp ∈ set_tree r" for rp
          proof -
            have "low p ≤ low rp"
              using asm p(1) Node(4) by(fastforce simp: sorted_wrt_append ivl_less)
            then show ?thesis
              using False by (auto simp: overlap_def)
          qed
          then show ?thesis
            unfolding search_eval search_l
            using a_disjoint by (auto simp: has_overlap_def overlap_def)
        qed
      next
        case False
        have search_eval: "search (Node l (a, m) r) x = search r x"
          using a_disjoint False by (auto simp: overlap_def)
        have "¬overlap x lp" if asm: "lp ∈ set_tree l" for lp
          using asm False Node.prems(1) max_hi_is_max
          by (fastforce simp: overlap_def)
        then show ?thesis
          unfolding search_eval search_r
          using a_disjoint by (auto simp: has_overlap_def overlap_def)
      qed
    qed
  qed
qed

definition empty :: "'a ivl_tree" where
"empty = Leaf"


subsection ‹Specification›

locale Interval_Set = Set +
  fixes has_overlap :: "'t ⇒ 'a::linorder ivl ⇒ bool"
  assumes set_overlap: "invar s ⟹ has_overlap s x = Interval_Tree.has_overlap (set s) x"

fun invar :: "('a::{linorder,order_bot}) ivl_tree ⇒ bool" where
"invar t = (inv_max_hi t ∧ sorted(inorder t))"

interpretation S: Interval_Set
  where empty = Leaf and insert = insert and delete = delete
  and has_overlap = search and isin = isin and set = set_tree
  and invar = invar
proof (standard, goal_cases)
  case 1
  then show ?case by auto
next
  case 2
  then show ?case by (simp add: isin_set_inorder)
next
  case 3
  then show ?case by(simp add: inorder_insert set_ins_list flip: set_inorder)
next
  case 4
  then show ?case by(simp add: inorder_delete set_del_list flip: set_inorder)
next
  case 5
  then show ?case by auto
next
  case 6
  then show ?case by (simp add: inorder_insert inv_max_hi_insert sorted_ins_list)
next
  case 7
  then show ?case by (simp add: inorder_delete inv_max_hi_delete sorted_del_list)
next
  case 8
  then show ?case by (simp add: search_correct)
qed

end