src/HOL/Library/Omega_Words_Fun.thy
 author wenzelm Wed, 08 Mar 2017 10:50:59 +0100 changeset 65151 a7394aa4d21c parent 64593 50c715579715 child 67443 3abf6a722518 permissions -rw-r--r--
tuned proofs;

(*
Author:     Stefan Merz
Author:     Salomon Sickert
Author:     Julian Brunner
Author:     Peter Lammich
*)

section \<open>$\omega$-words\<close>

theory Omega_Words_Fun

imports Infinite_Set
begin

text \<open>Note: This theory is based on Stefan Merz's work.\<close>

text \<open>
Automata recognize languages, which are sets of words. For the
theory of $\omega$-automata, we are mostly interested in
$\omega$-words, but it is sometimes useful to reason about
finite words, too. We are modeling finite words as lists; this
lets us benefit from the existing library. Other formalizations
could be investigated, such as representing words as functions
whose domains are initial intervals of the natural numbers.
\<close>

subsection \<open>Type declaration and elementary operations\<close>

text \<open>
We represent $\omega$-words as functions from the natural numbers
to the alphabet type. Other possible formalizations include
a coinductive definition or a uniform encoding of finite and
infinite words, as studied by M\"uller et al.
\<close>

type_synonym
'a word = "nat \<Rightarrow> 'a"

text \<open>
We can prefix a finite word to an $\omega$-word, and a way
to obtain an $\omega$-word from a finite, non-empty word is by
$\omega$-iteration.
\<close>

definition
conc :: "['a list, 'a word] \<Rightarrow> 'a word"  (infixr "\<frown>" 65)
where "w \<frown> x == \<lambda>n. if n < length w then w!n else x (n - length w)"

definition
iter :: "'a list \<Rightarrow> 'a word"  ("(_\<^sup>\<omega>)" )
where "iter w == if w = [] then undefined else (\<lambda>n. w!(n mod (length w)))"

lemma conc_empty[simp]: "[] \<frown> w = w"
unfolding conc_def by auto

lemma conc_fst[simp]: "n < length w \<Longrightarrow> (w \<frown> x) n = w!n"

lemma conc_snd[simp]: "\<not>(n < length w) \<Longrightarrow> (w \<frown> x) n = x (n - length w)"

lemma iter_nth [simp]: "0 < length w \<Longrightarrow> w\<^sup>\<omega> n = w!(n mod (length w))"

lemma conc_conc[simp]: "u \<frown> v \<frown> w = (u @ v) \<frown> w" (is "?lhs = ?rhs")
proof
fix n
have u: "n < length u \<Longrightarrow> ?lhs n = ?rhs n"
have v: "\<lbrakk> \<not>(n < length u); n < length u + length v \<rbrakk> \<Longrightarrow> ?lhs n = ?rhs n"
by (simp add: conc_def nth_append, arith)
have w: "\<not>(n < length u + length v) \<Longrightarrow> ?lhs n = ?rhs n"
by (simp add: conc_def nth_append, arith)
from u v w show "?lhs n = ?rhs n" by blast
qed

lemma range_conc[simp]: "range (w\<^sub>1 \<frown> w\<^sub>2) = set w\<^sub>1 \<union> range w\<^sub>2"
proof (intro equalityI subsetI)
fix a
assume "a \<in> range (w\<^sub>1 \<frown> w\<^sub>2)"
then obtain i where 1: "a = (w\<^sub>1 \<frown> w\<^sub>2) i" by auto
then show "a \<in> set w\<^sub>1 \<union> range w\<^sub>2"
unfolding 1 by (cases "i < length w\<^sub>1") simp_all
next
fix a
assume a: "a \<in> set w\<^sub>1 \<union> range w\<^sub>2"
then show "a \<in> range (w\<^sub>1 \<frown> w\<^sub>2)"
proof
assume "a \<in> set w\<^sub>1"
then obtain i where 1: "i < length w\<^sub>1" "a = w\<^sub>1 ! i"
using in_set_conv_nth by metis
show ?thesis
proof
show "a = (w\<^sub>1 \<frown> w\<^sub>2) i" using 1 by auto
show "i \<in> UNIV" by rule
qed
next
assume "a \<in> range w\<^sub>2"
then obtain i where 1: "a = w\<^sub>2 i" by auto
show ?thesis
proof
show "a = (w\<^sub>1 \<frown> w\<^sub>2) (length w\<^sub>1 + i)" using 1 by simp
show "length w\<^sub>1 + i \<in> UNIV" by rule
qed
qed
qed

lemma iter_unroll: "0 < length w \<Longrightarrow> w\<^sup>\<omega> = w \<frown> w\<^sup>\<omega>"
by (rule ext) (simp add: conc_def mod_geq)

subsection \<open>Subsequence, Prefix, and Suffix\<close>

definition suffix :: "[nat, 'a word] \<Rightarrow> 'a word"
where "suffix k x \<equiv> \<lambda>n. x (k+n)"

definition subsequence :: "'a word \<Rightarrow> nat \<Rightarrow> nat \<Rightarrow> 'a list"  ("_ [_ \<rightarrow> _]" 900)
where "subsequence w i j \<equiv> map w [i..<j]"

abbreviation prefix :: "nat \<Rightarrow> 'a word \<Rightarrow> 'a list"
where "prefix n w \<equiv> subsequence w 0 n"

lemma suffix_nth [simp]: "(suffix k x) n = x (k+n)"

lemma suffix_0 [simp]: "suffix 0 x = x"

lemma suffix_suffix [simp]: "suffix m (suffix k x) = suffix (k+m) x"

lemma subsequence_append: "prefix (i + j) w = prefix i w @ (w [i \<rightarrow> i + j])"
unfolding map_append[symmetric] upt_add_eq_append[OF le0] subsequence_def ..

lemma subsequence_drop[simp]: "drop i (w [j \<rightarrow> k]) = w [j + i \<rightarrow> k]"

lemma subsequence_empty[simp]: "w [i \<rightarrow> j] = [] \<longleftrightarrow> j \<le> i"

lemma subsequence_length[simp]: "length (subsequence w i j) = j - i"

lemma subsequence_nth[simp]: "k < j - i \<Longrightarrow> (w [i \<rightarrow> j]) ! k = w (i + k)"
unfolding subsequence_def
by auto

lemma subseq_to_zero[simp]: "w[i\<rightarrow>0] = []"
by simp

lemma subseq_to_smaller[simp]: "i\<ge>j \<Longrightarrow> w[i\<rightarrow>j] = []"
by simp

lemma subseq_to_Suc[simp]: "i\<le>j \<Longrightarrow> w [i \<rightarrow> Suc j] = w [ i \<rightarrow> j ] @ [w j]"
by (auto simp: subsequence_def)

lemma subsequence_singleton[simp]: "w [i \<rightarrow> Suc i] = [w i]"
by (auto simp: subsequence_def)

lemma subsequence_prefix_suffix: "prefix (j - i) (suffix i w) = w [i \<rightarrow> j]"
proof (cases "i \<le> j")
case True
have "w [i \<rightarrow> j] = map w (map (\<lambda>n. n + i) [0..<j - i])"
also
have "\<dots> = map (\<lambda>n. w (n + i)) [0..<j - i]"
unfolding map_map comp_def by blast
finally
show ?thesis
unfolding subsequence_def suffix_def add.commute[of i] by simp
next
case False
then show ?thesis
qed

lemma prefix_suffix: "x = prefix n x \<frown> (suffix n x)"
by (rule ext) (simp add: subsequence_def conc_def)

declare prefix_suffix[symmetric, simp]

lemma word_split: obtains v\<^sub>1 v\<^sub>2 where "v = v\<^sub>1 \<frown> v\<^sub>2" "length v\<^sub>1 = k"
proof
show "v = prefix k v \<frown> suffix k v"
by (rule prefix_suffix)
show "length (prefix k v) = k"
by simp
qed

lemma set_subsequence[simp]: "set (w[i\<rightarrow>j]) = w{i..<j}"
unfolding subsequence_def by auto

lemma subsequence_take[simp]: "take i (w [j \<rightarrow> k]) = w [j \<rightarrow> min (j + i) k]"
by (simp add: subsequence_def take_map min_def)

lemma subsequence_shift[simp]: "(suffix i w) [j \<rightarrow> k] = w [i + j \<rightarrow> i + k]"

lemma suffix_subseq_join[simp]: "i \<le> j \<Longrightarrow> v [i \<rightarrow> j] \<frown> suffix j v = suffix i v"
subsequence_shift suffix_suffix)

lemma prefix_conc_fst[simp]:
assumes "j \<le> length w"
shows "prefix j (w \<frown> w') = take j w"
proof -
have "\<forall>i < j. (prefix j (w \<frown> w')) ! i = (take j w) ! i"
using assms by (simp add: conc_fst subsequence_def)
thus ?thesis
by (simp add: assms list_eq_iff_nth_eq min.absorb2)
qed

lemma prefix_conc_snd[simp]:
assumes "n \<ge> length u"
shows "prefix n (u \<frown> v) = u @ prefix (n - length u) v"
proof (intro nth_equalityI allI impI)
show "length (prefix n (u \<frown> v)) = length (u @ prefix (n - length u) v)"
using assms by simp
fix i
assume "i < length (prefix n (u \<frown> v))"
then show "prefix n (u \<frown> v) ! i = (u @ prefix (n - length u) v) ! i"
by (cases "i < length u") (auto simp: nth_append)
qed

lemma prefix_conc_length[simp]: "prefix (length w) (w \<frown> w') = w"
by simp

lemma suffix_conc_fst[simp]:
assumes "n \<le> length u"
shows "suffix n (u \<frown> v) = drop n u \<frown> v"
proof
show "suffix n (u \<frown> v) i = (drop n u \<frown> v) i" for i
using assms by (cases "n + i < length u") (auto simp: algebra_simps)
qed

lemma suffix_conc_snd[simp]:
assumes "n \<ge> length u"
shows "suffix n (u \<frown> v) = suffix (n - length u) v"
proof
show "suffix n (u \<frown> v) i = suffix (n - length u) v i" for i
using assms by simp
qed

lemma suffix_conc_length[simp]: "suffix (length w) (w \<frown> w') = w'"
unfolding conc_def by force

lemma concat_eq[iff]:
assumes "length v\<^sub>1 = length v\<^sub>2"
shows "v\<^sub>1 \<frown> u\<^sub>1 = v\<^sub>2 \<frown> u\<^sub>2 \<longleftrightarrow> v\<^sub>1 = v\<^sub>2 \<and> u\<^sub>1 = u\<^sub>2"
(is "?lhs \<longleftrightarrow> ?rhs")
proof
assume ?lhs
then have 1: "(v\<^sub>1 \<frown> u\<^sub>1) i = (v\<^sub>2 \<frown> u\<^sub>2) i" for i by auto
show ?rhs
proof (intro conjI ext nth_equalityI allI impI)
show "length v\<^sub>1 = length v\<^sub>2" by (rule assms(1))
next
fix i
assume 2: "i < length v\<^sub>1"
have 3: "i < length v\<^sub>2" using assms(1) 2 by simp
show "v\<^sub>1 ! i = v\<^sub>2 ! i" using 1[of i] 2 3 by simp
next
show "u\<^sub>1 i = u\<^sub>2 i" for i
using 1[of "length v\<^sub>1 + i"] assms(1) by simp
qed
next
assume ?rhs
then show ?lhs by simp
qed

lemma same_concat_eq[iff]: "u \<frown> v = u \<frown> w \<longleftrightarrow> v = w"
by simp

lemma comp_concat[simp]: "f \<circ> u \<frown> v = map f u \<frown> (f \<circ> v)"
proof
fix i
show "(f \<circ> u \<frown> v) i = (map f u \<frown> (f \<circ> v)) i"
by (cases "i < length u") simp_all
qed

subsection \<open>Prepending\<close>

primrec build :: "'a \<Rightarrow> 'a word \<Rightarrow> 'a word"  (infixr "##" 65)
where "(a ## w) 0 = a" | "(a ## w) (Suc i) = w i"

lemma build_eq[iff]: "a\<^sub>1 ## w\<^sub>1 = a\<^sub>2 ## w\<^sub>2 \<longleftrightarrow> a\<^sub>1 = a\<^sub>2 \<and> w\<^sub>1 = w\<^sub>2"
proof
assume 1: "a\<^sub>1 ## w\<^sub>1 = a\<^sub>2 ## w\<^sub>2"
have 2: "(a\<^sub>1 ## w\<^sub>1) i = (a\<^sub>2 ## w\<^sub>2) i" for i
using 1 by auto
show "a\<^sub>1 = a\<^sub>2 \<and> w\<^sub>1 = w\<^sub>2"
proof (intro conjI ext)
show "a\<^sub>1 = a\<^sub>2"
using 2[of "0"] by simp
show "w\<^sub>1 i = w\<^sub>2 i" for i
using 2[of "Suc i"] by simp
qed
next
assume 1: "a\<^sub>1 = a\<^sub>2 \<and> w\<^sub>1 = w\<^sub>2"
show "a\<^sub>1 ## w\<^sub>1 = a\<^sub>2 ## w\<^sub>2" using 1 by simp
qed

lemma build_cons[simp]: "(a # u) \<frown> v = a ## u \<frown> v"
proof
fix i
show "((a # u) \<frown> v) i = (a ## u \<frown> v) i"
proof (cases i)
case 0
show ?thesis unfolding 0 by simp
next
case (Suc j)
show ?thesis unfolding Suc by (cases "j < length u", simp+)
qed
qed

lemma build_append[simp]: "(w @ a # u) \<frown> v = w \<frown> a ## u \<frown> v"
unfolding conc_conc[symmetric] by simp

lemma build_first[simp]: "w 0 ## suffix (Suc 0) w = w"
proof
show "(w 0 ## suffix (Suc 0) w) i = w i" for i
by (cases i) simp_all
qed

lemma build_split[intro]: "w = w 0 ## suffix 1 w"
by simp

lemma build_range[simp]: "range (a ## w) = insert a (range w)"
proof safe
show "(a ## w) i \<notin> range w \<Longrightarrow> (a ## w) i = a" for i
by (cases i) auto
show "a \<in> range (a ## w)"
proof (rule range_eqI)
show "a = (a ## w) 0" by simp
qed
show "w i \<in> range (a ## w)" for i
proof (rule range_eqI)
show "w i = (a ## w) (Suc i)" by simp
qed
qed

lemma suffix_singleton_suffix[simp]: "w i ## suffix (Suc i) w = suffix i w"
using suffix_subseq_join[of i "Suc i" w]
by simp

text \<open>Find the first occurrence of a letter from a given set\<close>
lemma word_first_split_set:
assumes "A \<inter> range w \<noteq> {}"
obtains u a v where "w = u \<frown> [a] \<frown> v" "A \<inter> set u = {}" "a \<in> A"
proof -
define i where "i = (LEAST i. w i \<in> A)"
show ?thesis
proof
show "w = prefix i w \<frown> [w i] \<frown> suffix (Suc i) w"
by simp
show "A \<inter> set (prefix i w) = {}"
apply safe
subgoal premises prems for a
proof -
from prems obtain k where 3: "k < i" "w k = a"
by auto
have 4: "w k \<notin> A"
using not_less_Least 3(1) unfolding i_def .
show ?thesis
using prems(1) 3(2) 4 by auto
qed
done
show "w i \<in> A"
using LeastI assms(1) unfolding i_def by fast
qed
qed

subsection \<open>The limit set of an $\omega$-word\<close>

text \<open>
The limit set (also called infinity set) of an $\omega$-word
is the set of letters that appear infinitely often in the word.
This set plays an important role in defining acceptance conditions
of $\omega$-automata.
\<close>

definition limit :: "'a word \<Rightarrow> 'a set"
where "limit x \<equiv> {a . \<exists>\<^sub>\<infinity>n . x n = a}"

lemma limit_iff_frequent: "a \<in> limit x \<longleftrightarrow> (\<exists>\<^sub>\<infinity>n . x n = a)"

text \<open>
The following is a different way to define the limit,
using the reverse image, making the laws about reverse
image applicable to the limit set.
(Might want to change the definition above?)
\<close>

lemma limit_vimage: "(a \<in> limit x) = infinite (x - {a})"
by (simp add: limit_def Inf_many_def vimage_def)

lemma two_in_limit_iff:
"({a, b} \<subseteq> limit x) =
((\<exists>n. x n =a ) \<and> (\<forall>n. x n = a \<longrightarrow> (\<exists>m>n. x m = b)) \<and> (\<forall>m. x m = b \<longrightarrow> (\<exists>n>m. x n = a)))"
(is "?lhs = (?r1 \<and> ?r2 \<and> ?r3)")
proof
assume lhs: "?lhs"
hence 1: "?r1" by (auto simp: limit_def elim: INFM_EX)
from lhs have "\<forall>n. \<exists>m>n. x m = b" by (auto simp: limit_def INFM_nat)
hence 2: "?r2" by simp
from lhs have "\<forall>m. \<exists>n>m. x n = a" by (auto simp: limit_def INFM_nat)
hence 3: "?r3" by simp
from 1 2 3 show "?r1 \<and> ?r2 \<and> ?r3" by simp
next
assume "?r1 \<and> ?r2 \<and> ?r3"
hence 1: "?r1" and 2: "?r2" and 3: "?r3" by simp+
have infa: "\<forall>m. \<exists>n\<ge>m. x n = a"
proof
fix m
show "\<exists>n\<ge>m. x n = a" (is "?A m")
proof (induct m)
from 1 show "?A 0" by simp
next
fix m
assume ih: "?A m"
then obtain n where n: "n \<ge> m" "x n = a" by auto
with 2 obtain k where k: "k>n" "x k = b" by auto
with 3 obtain l where l: "l>k" "x l = a" by auto
from n k l have "l \<ge> Suc m" by auto
with l show "?A (Suc m)" by auto
qed
qed
hence infa': "\<exists>\<^sub>\<infinity>n. x n = a" by (simp add: INFM_nat_le)
have "\<forall>n. \<exists>m>n. x m = b"
proof
fix n
from infa obtain k where k1: "k\<ge>n" and k2: "x k = a" by auto
from 2 k2 obtain l where l1: "l>k" and l2: "x l = b" by auto
from k1 l1 have "l > n" by auto
with l2 show "\<exists>m>n. x m = b" by auto
qed
hence "\<exists>\<^sub>\<infinity>m. x m = b" by (simp add: INFM_nat)
with infa' show "?lhs" by (auto simp: limit_def)
qed

text \<open>
For $\omega$-words over a finite alphabet, the limit set is
non-empty. Moreover, from some position onward, any such word
contains only letters from its limit set.
\<close>

lemma limit_nonempty:
assumes fin: "finite (range x)"
shows "\<exists>a. a \<in> limit x"
proof -
from fin obtain a where "a \<in> range x \<and> infinite (x - {a})"
by (rule inf_img_fin_domE) auto
hence "a \<in> limit x"
thus ?thesis ..
qed

lemmas limit_nonemptyE = limit_nonempty[THEN exE]

lemma limit_inter_INF:
assumes hyp: "limit w \<inter> S \<noteq> {}"
shows "\<exists>\<^sub>\<infinity> n. w n \<in> S"
proof -
from hyp obtain x where "\<exists>\<^sub>\<infinity> n. w n = x" and "x \<in> S"
thus ?thesis
by (auto elim: INFM_mono)
qed

text \<open>
The reverse implication is true only if $S$ is finite.
\<close>

lemma INF_limit_inter:
assumes hyp: "\<exists>\<^sub>\<infinity> n. w n \<in>  S"
and fin: "finite (S \<inter> range w)"
shows  "\<exists>a. a \<in> limit w \<inter> S"
proof (rule ccontr)
assume contra: "\<not>(\<exists>a. a \<in> limit w \<inter> S)"
hence "\<forall>a\<in>S. finite {n. w n = a}"
by (auto simp add: limit_def Inf_many_def)
with fin have "finite (UN a:S \<inter> range w. {n. w n = a})"
by auto
moreover
have "(UN a:S \<inter> range w. {n. w n = a}) = {n. w n \<in> S}"
by auto
moreover
note hyp
ultimately show "False"
qed

lemma fin_ex_inf_eq_limit: "finite A \<Longrightarrow> (\<exists>\<^sub>\<infinity>i. w i \<in> A) \<longleftrightarrow> limit w \<inter> A \<noteq> {}"
by (metis INF_limit_inter equals0D finite_Int limit_inter_INF)

lemma limit_in_range_suffix: "limit x \<subseteq> range (suffix k x)"
proof
fix a
assume "a \<in> limit x"
then obtain l where
kl: "k < l" and xl: "x l = a"
by (auto simp add: limit_def INFM_nat)
from kl obtain m where "l = k+m"
with xl show "a \<in> range (suffix k x)"
by auto
qed

lemma limit_in_range: "limit r \<subseteq> range r"
using limit_in_range_suffix[of r 0] by simp

lemmas limit_in_range_suffixD = limit_in_range_suffix[THEN subsetD]

lemma limit_subset: "limit f \<subseteq> f  {n..}"
using limit_in_range_suffix[of f n] unfolding suffix_def by auto

theorem limit_is_suffix:
assumes fin: "finite (range x)"
shows "\<exists>k. limit x = range (suffix k x)"
proof -
have "\<exists>k. range (suffix k x) \<subseteq> limit x"
proof -
\<comment> "The set of letters that are not in the limit is certainly finite."
from fin have "finite (range x - limit x)"
by simp
\<comment> "Moreover, any such letter occurs only finitely often"
moreover
have "\<forall>a \<in> range x - limit x. finite (x - {a})"
\<comment> "Thus, there are only finitely many occurrences of such letters."
ultimately have "finite (UN a : range x - limit x. x - {a})"
by (blast intro: finite_UN_I)
\<comment> "Therefore these occurrences are within some initial interval."
then obtain k where "(UN a : range x - limit x. x - {a}) \<subseteq> {..<k}"
by (blast dest: finite_nat_bounded)
\<comment> "This is just the bound we are looking for."
hence "\<forall>m. k \<le> m \<longrightarrow> x m \<in> limit x"
hence "range (suffix k x) \<subseteq> limit x"
by auto
thus ?thesis ..
qed
then obtain k where "range (suffix k x) \<subseteq> limit x" ..
with limit_in_range_suffix
have "limit x = range (suffix k x)"
by (rule subset_antisym)
thus ?thesis ..
qed

lemmas limit_is_suffixE = limit_is_suffix[THEN exE]

text \<open>
The limit set enjoys some simple algebraic laws with respect
to concatenation, suffixes, iteration, and renaming.
\<close>

theorem limit_conc [simp]: "limit (w \<frown> x) = limit x"
proof (auto)
fix a assume a: "a \<in> limit (w \<frown> x)"
have "\<forall>m. \<exists>n. m<n \<and> x n = a"
proof
fix m
from a obtain n where "m + length w < n \<and> (w \<frown> x) n = a"
by (auto simp add: limit_def Inf_many_def infinite_nat_iff_unbounded)
hence "m < n - length w \<and> x (n - length w) = a"
thus "\<exists>n. m<n \<and> x n = a" ..
qed
hence "infinite {n . x n = a}"
thus "a \<in> limit x"
next
fix a assume a: "a \<in> limit x"
have "\<forall>m. length w < m \<longrightarrow> (\<exists>n. m<n \<and> (w \<frown> x) n = a)"
proof (clarify)
fix m
assume m: "length w < m"
with a obtain n where "m - length w < n \<and> x n = a"
by (auto simp add: limit_def Inf_many_def infinite_nat_iff_unbounded)
with m have "m < n + length w \<and> (w \<frown> x) (n + length w) = a"
thus "\<exists>n. m<n \<and> (w \<frown> x) n = a" ..
qed
hence "infinite {n . (w \<frown> x) n = a}"
thus "a \<in> limit (w \<frown> x)"
qed

theorem limit_suffix [simp]: "limit (suffix n x) = limit x"
proof -
have "x = (prefix n x) \<frown> (suffix n x)"
hence "limit x = limit (prefix n x \<frown> suffix n x)"
by simp
also have "\<dots> = limit (suffix n x)"
by (rule limit_conc)
finally show ?thesis
by (rule sym)
qed

theorem limit_iter [simp]:
assumes nempty: "0 < length w"
shows "limit w\<^sup>\<omega> = set w"
proof
have "limit w\<^sup>\<omega> \<subseteq> range w\<^sup>\<omega>"
by (auto simp add: limit_def dest: INFM_EX)
also from nempty have "\<dots> \<subseteq> set w"
by auto
finally show "limit w\<^sup>\<omega> \<subseteq> set w" .
next
{
fix a assume a: "a \<in> set w"
then obtain k where k: "k < length w \<and> w!k = a"
\<comment> "the following bound is terrible, but it simplifies the proof"
from nempty k have "\<forall>m. w\<^sup>\<omega> ((Suc m)*(length w) + k) = a"
moreover
\<comment> "why is the following so hard to prove??"
have "\<forall>m. m < (Suc m)*(length w) + k"
proof
fix m
from nempty have "1 \<le> length w" by arith
hence "m*1 \<le> m*length w" by simp
hence "m \<le> m*length w" by simp
with nempty have "m < length w + (m*length w) + k" by arith
thus "m < (Suc m)*(length w) + k" by simp
qed
moreover note nempty
ultimately have "a \<in> limit w\<^sup>\<omega>"
by (auto simp add: limit_iff_frequent INFM_nat)
}
then show "set w \<subseteq> limit w\<^sup>\<omega>" by auto
qed

lemma limit_o [simp]:
assumes a: "a \<in> limit w"
shows "f a \<in> limit (f \<circ> w)"
proof -
from a
have "\<exists>\<^sub>\<infinity>n. w n = a"
hence "\<exists>\<^sub>\<infinity>n. f (w n) = f a"
by (rule INFM_mono, simp)
thus "f a \<in> limit (f \<circ> w)"
qed

text \<open>
The converse relation is not true in general: $f(a)$ can be in the
limit of $f \circ w$ even though $a$ is not in the limit of $w$.
However, \<open>limit\<close> commutes with renaming if the function is
injective. More generally, if $f(a)$ is the image of only finitely
many elements, some of these must be in the limit of $w$.
\<close>

lemma limit_o_inv:
assumes fin: "finite (f - {x})"
and x: "x \<in> limit (f \<circ> w)"
shows "\<exists>a \<in> (f - {x}). a \<in> limit w"
proof (rule ccontr)
assume contra: "\<not> ?thesis"
\<comment> "hence, every element in the pre-image occurs only finitely often"
then have "\<forall>a \<in> (f - {x}). finite {n. w n = a}"
\<comment> "so there are only finitely many occurrences of any such element"
with fin have "finite (\<Union> a \<in> (f - {x}). {n. w n = a})"
by auto
\<comment> \<open>these are precisely those positions where $x$ occurs in $f \circ w$\<close>
moreover
have "(\<Union> a \<in> (f - {x}). {n. w n = a}) = {n. f(w n) = x}"
by auto
ultimately
\<comment> "so $x$ can occur only finitely often in the translated word"
have "finite {n. f(w n) = x}"
by simp
\<comment> \<open>\ldots\ which yields a contradiction\<close>
with x show "False"
qed

theorem limit_inj [simp]:
assumes inj: "inj f"
shows "limit (f \<circ> w) = f  (limit w)"
proof
show "f  limit w \<subseteq> limit (f \<circ> w)"
by auto
show "limit (f \<circ> w) \<subseteq> f  limit w"
proof
fix x
assume x: "x \<in> limit (f \<circ> w)"
from inj have "finite (f - {x})"
by (blast intro: finite_vimageI)
with x obtain a where a: "a \<in> (f - {x}) \<and> a \<in> limit w"
by (blast dest: limit_o_inv)
thus "x \<in> f  (limit w)"
by auto
qed
qed

lemma limit_inter_empty:
assumes fin: "finite (range w)"
assumes hyp: "limit w \<inter> S = {}"
shows "\<forall>\<^sub>\<infinity>n. w n \<notin> S"
proof -
from fin obtain k where k_def: "limit w = range (suffix k w)"
using limit_is_suffix by blast
have "w (k + k') \<notin> S" for k'
using hyp unfolding k_def suffix_def image_def by blast
thus ?thesis
unfolding MOST_nat_le using le_Suc_ex by blast
qed

text \<open>If the limit is the suffix of the sequence's range,
we may increase the suffix index arbitrarily\<close>
lemma limit_range_suffix_incr:
assumes "limit r = range (suffix i r)"
assumes "j\<ge>i"
shows "limit r = range (suffix j r)"
(is "?lhs = ?rhs")
proof -
have "?lhs = range (suffix i r)"
using assms by simp
moreover
have "\<dots> \<supseteq> ?rhs" using \<open>j\<ge>i\<close>
by (metis (mono_tags, lifting) assms(2)
image_subsetI le_Suc_ex range_eqI suffix_def suffix_suffix)
moreover
have "\<dots> \<supseteq> ?lhs" by (rule limit_in_range_suffix)
ultimately
show "?lhs = ?rhs"
by (metis antisym_conv limit_in_range_suffix)
qed

text \<open>For two finite sequences, we can find a common suffix index such
that the limits can be represented as these suffixes' ranges.\<close>
lemma common_range_limit:
assumes "finite (range x)"
and "finite (range y)"
obtains i where "limit x = range (suffix i x)"
and "limit y = range (suffix i y)"
proof -
obtain i j where 1: "limit x = range (suffix i x)"
and 2: "limit y = range (suffix j y)"
using assms limit_is_suffix by metis
have "limit x = range (suffix (max i j) x)"
and "limit y = range (suffix (max i j) y)"
using limit_range_suffix_incr[OF 1] limit_range_suffix_incr[OF 2]
by auto
thus ?thesis
using that by metis
qed

subsection \<open>Index sequences and piecewise definitions\<close>

text \<open>
A word can be defined piecewise: given a sequence of words $w_0, w_1, \ldots$
and a strictly increasing sequence of integers $i_0, i_1, \ldots$ where $i_0=0$,
a single word is obtained by concatenating subwords of the $w_n$ as given by
the integers: the resulting word is
$(w_0)_{i_0} \ldots (w_0)_{i_1-1} (w_1)_{i_1} \ldots (w_1)_{i_2-1} \ldots$
We prepare the field by proving some trivial facts about such sequences of
indexes.
\<close>

definition idx_sequence :: "nat word \<Rightarrow> bool"
where "idx_sequence idx \<equiv> (idx 0 = 0) \<and> (\<forall>n. idx n < idx (Suc n))"

lemma idx_sequence_less:
assumes iseq: "idx_sequence idx"
shows "idx n < idx (Suc(n+k))"
proof (induct k)
from iseq show "idx n < idx (Suc (n + 0))"
next
fix k
assume ih: "idx n < idx (Suc(n+k))"
from iseq have "idx (Suc(n+k)) < idx (Suc(n + Suc k))"
with ih show "idx n < idx (Suc(n + Suc k))"
by (rule less_trans)
qed

lemma idx_sequence_inj:
assumes iseq: "idx_sequence idx"
and eq: "idx m = idx n"
shows "m = n"
proof (cases m n rule: linorder_cases)
case greater
then obtain k where "m = Suc(n+k)"
with iseq have "idx n < idx m"
with eq show ?thesis
by simp
next
case less
then obtain k where "n = Suc(m+k)"
with iseq have "idx m < idx n"
with eq show ?thesis
by simp
qed

lemma idx_sequence_mono:
assumes iseq: "idx_sequence idx"
and m: "m \<le> n"
shows "idx m \<le> idx n"
proof (cases "m=n")
case True
thus ?thesis by simp
next
case False
with m have "m < n" by simp
then obtain k where "n = Suc(m+k)"
with iseq have "idx m < idx n"
thus ?thesis by simp
qed

text \<open>
Given an index sequence, every natural number is contained in the
interval defined by two adjacent indexes, and in fact this interval
is determined uniquely.
\<close>

lemma idx_sequence_idx:
assumes "idx_sequence idx"
shows "idx k \<in> {idx k ..< idx (Suc k)}"
using assms by (auto simp add: idx_sequence_def)

lemma idx_sequence_interval:
assumes iseq: "idx_sequence idx"
shows "\<exists>k. n \<in> {idx k ..< idx (Suc k) }"
(is "?P n" is "\<exists>k. ?in n k")
proof (induct n)
from iseq have "0 = idx 0"
moreover
from iseq have "idx 0 \<in> {idx 0 ..< idx (Suc 0) }"
by (rule idx_sequence_idx)
ultimately
show "?P 0" by auto
next
fix n
assume "?P n"
then obtain k where k: "?in n k" ..
show "?P (Suc n)"
proof (cases "Suc n < idx (Suc k)")
case True
with k have "?in (Suc n) k"
by simp
thus ?thesis ..
next
case False
with k have "Suc n = idx (Suc k)"
by auto
with iseq have "?in (Suc n) (Suc k)"
thus ?thesis ..
qed
qed

lemma idx_sequence_interval_unique:
assumes iseq: "idx_sequence idx"
and k: "n \<in> {idx k ..< idx (Suc k)}"
and m: "n \<in> {idx m ..< idx (Suc m)}"
shows "k = m"
proof (cases k m rule: linorder_cases)
case less
hence "Suc k \<le> m" by simp
with iseq have "idx (Suc k) \<le> idx m"
by (rule idx_sequence_mono)
with m have "idx (Suc k) \<le> n"
by auto
with k have "False"
by simp
thus ?thesis ..
next
case greater
hence "Suc m \<le> k" by simp
with iseq have "idx (Suc m) \<le> idx k"
by (rule idx_sequence_mono)
with k have "idx (Suc m) \<le> n"
by auto
with m have "False"
by simp
thus ?thesis ..
qed

lemma idx_sequence_unique_interval:
assumes iseq: "idx_sequence idx"
shows "\<exists>! k. n \<in> {idx k ..< idx (Suc k) }"
proof (rule ex_ex1I)
from iseq show "\<exists>k. n \<in> {idx k ..< idx (Suc k)}"
by (rule idx_sequence_interval)
next
fix k y
assume "n \<in> {idx k..<idx (Suc k)}" and "n \<in> {idx y..<idx (Suc y)}"
with iseq show "k = y" by (auto elim: idx_sequence_interval_unique)
qed

text \<open>
Now we can define the piecewise construction of a word using
an index sequence.
\<close>

definition merge :: "'a word word \<Rightarrow> nat word \<Rightarrow> 'a word"
where "merge ws idx \<equiv> \<lambda>n. let i = THE i. n \<in> {idx i ..< idx (Suc i) } in ws i n"

lemma merge:
assumes idx: "idx_sequence idx"
and n: "n \<in> {idx i ..< idx (Suc i)}"
shows "merge ws idx n = ws i n"
proof -
from n have "(THE k. n \<in> {idx k ..< idx (Suc k) }) = i"
by (rule the_equality[OF _ sym[OF idx_sequence_interval_unique[OF idx n]]]) simp
thus ?thesis
qed

lemma merge0:
assumes idx: "idx_sequence idx"
shows "merge ws idx 0 = ws 0 0"
proof (rule merge[OF idx])
from idx have "idx 0 < idx (Suc 0)"
unfolding idx_sequence_def by blast
with idx show "0 \<in> {idx 0 ..< idx (Suc 0)}"
qed

lemma merge_Suc:
assumes idx: "idx_sequence idx"
and n: "n \<in> {idx i ..< idx (Suc i)}"
shows "merge ws idx (Suc n) = (if Suc n = idx (Suc i) then ws (Suc i) else ws i) (Suc n)"
proof auto
assume eq: "Suc n = idx (Suc i)"
from idx have "idx (Suc i) < idx (Suc(Suc i))"
unfolding idx_sequence_def by blast
with eq idx show "merge ws idx (idx (Suc i)) = ws (Suc i) (idx (Suc i))"