src/HOL/String.thy
author haftmann
Mon Sep 26 07:56:54 2016 +0200 (2016-09-26)
changeset 63950 cdc1e59aa513
parent 62678 843ff6f1de38
child 64630 96015aecfeba
permissions -rw-r--r--
syntactic type class for operation mod named after mod;
simplified assumptions of type class semiring_div
     1 (* Author: Tobias Nipkow, Florian Haftmann, TU Muenchen *)
     2 
     3 section \<open>Character and string types\<close>
     4 
     5 theory String
     6 imports Enum
     7 begin
     8 
     9 subsection \<open>Characters and strings\<close>
    10 
    11 subsubsection \<open>Characters as finite algebraic type\<close>
    12 
    13 typedef char = "{n::nat. n < 256}"
    14   morphisms nat_of_char Abs_char
    15 proof
    16   show "Suc 0 \<in> {n. n < 256}" by simp
    17 qed
    18 
    19 definition char_of_nat :: "nat \<Rightarrow> char"
    20 where
    21   "char_of_nat n = Abs_char (n mod 256)"
    22 
    23 lemma char_cases [case_names char_of_nat, cases type: char]:
    24   "(\<And>n. c = char_of_nat n \<Longrightarrow> n < 256 \<Longrightarrow> P) \<Longrightarrow> P"
    25   by (cases c) (simp add: char_of_nat_def)
    26 
    27 lemma char_of_nat_of_char [simp]:
    28   "char_of_nat (nat_of_char c) = c"
    29   by (cases c) (simp add: char_of_nat_def Abs_char_inject Abs_char_inverse)
    30 
    31 lemma inj_nat_of_char:
    32   "inj nat_of_char"
    33 proof (rule injI)
    34   fix c d
    35   assume "nat_of_char c = nat_of_char d"
    36   then have "char_of_nat (nat_of_char c) = char_of_nat (nat_of_char d)"
    37     by simp
    38   then show "c = d"
    39     by simp
    40 qed
    41   
    42 lemma nat_of_char_eq_iff [simp]:
    43   "nat_of_char c = nat_of_char d \<longleftrightarrow> c = d"
    44   by (fact nat_of_char_inject)
    45 
    46 lemma nat_of_char_of_nat [simp]:
    47   "nat_of_char (char_of_nat n) = n mod 256"
    48   by (cases "char_of_nat n") (simp add: char_of_nat_def Abs_char_inject Abs_char_inverse)
    49 
    50 lemma char_of_nat_mod_256 [simp]:
    51   "char_of_nat (n mod 256) = char_of_nat n"
    52   by (cases "char_of_nat (n mod 256)") (simp add: char_of_nat_def)
    53 
    54 lemma char_of_nat_quasi_inj [simp]:
    55   "char_of_nat m = char_of_nat n \<longleftrightarrow> m mod 256 = n mod 256"
    56   by (simp add: char_of_nat_def Abs_char_inject)
    57 
    58 lemma inj_on_char_of_nat [simp]:
    59   "inj_on char_of_nat {..<256}"
    60   by (rule inj_onI) simp
    61 
    62 lemma nat_of_char_mod_256 [simp]:
    63   "nat_of_char c mod 256 = nat_of_char c"
    64   by (cases c) simp
    65 
    66 lemma nat_of_char_less_256 [simp]:
    67   "nat_of_char c < 256"
    68 proof -
    69   have "nat_of_char c mod 256 < 256"
    70     by arith
    71   then show ?thesis by simp
    72 qed
    73 
    74 lemma UNIV_char_of_nat:
    75   "UNIV = char_of_nat ` {..<256}"
    76 proof -
    77   { fix c
    78     have "c \<in> char_of_nat ` {..<256}"
    79       by (cases c) auto
    80   } then show ?thesis by auto
    81 qed
    82 
    83 lemma card_UNIV_char:
    84   "card (UNIV :: char set) = 256"
    85   by (auto simp add: UNIV_char_of_nat card_image)
    86 
    87 lemma range_nat_of_char:
    88   "range nat_of_char = {..<256}"
    89   by (auto simp add: UNIV_char_of_nat image_image image_def)
    90 
    91 
    92 subsubsection \<open>Character literals as variant of numerals\<close>
    93 
    94 instantiation char :: zero
    95 begin
    96 
    97 definition zero_char :: char
    98   where "0 = char_of_nat 0"
    99 
   100 instance ..
   101 
   102 end
   103 
   104 definition Char :: "num \<Rightarrow> char"
   105   where "Char k = char_of_nat (numeral k)"
   106 
   107 code_datatype "0 :: char" Char
   108 
   109 lemma nat_of_char_zero [simp]:
   110   "nat_of_char 0 = 0"
   111   by (simp add: zero_char_def)
   112 
   113 lemma nat_of_char_Char [simp]:
   114   "nat_of_char (Char k) = numeral k mod 256"
   115   by (simp add: Char_def)
   116 
   117 lemma Char_eq_Char_iff [simp]:
   118   "Char k = Char l \<longleftrightarrow> numeral k mod (256 :: nat) = numeral l mod 256" (is "?P \<longleftrightarrow> ?Q")
   119 proof -
   120   have "?P \<longleftrightarrow> nat_of_char (Char k) = nat_of_char (Char l)"
   121     by (rule sym, rule inj_eq) (fact inj_nat_of_char)
   122   also have "nat_of_char (Char k) = nat_of_char (Char l) \<longleftrightarrow> ?Q"
   123     by (simp only: Char_def nat_of_char_of_nat)
   124   finally show ?thesis .
   125 qed
   126 
   127 lemma zero_eq_Char_iff [simp]:
   128   "0 = Char k \<longleftrightarrow> numeral k mod (256 :: nat) = 0"
   129   by (auto simp add: zero_char_def Char_def)
   130 
   131 lemma Char_eq_zero_iff [simp]:
   132   "Char k = 0 \<longleftrightarrow> numeral k mod (256 :: nat) = 0"
   133   by (auto simp add: zero_char_def Char_def) 
   134 
   135 definition integer_of_char :: "char \<Rightarrow> integer"
   136 where
   137   "integer_of_char = integer_of_nat \<circ> nat_of_char"
   138 
   139 definition char_of_integer :: "integer \<Rightarrow> char"
   140 where
   141   "char_of_integer = char_of_nat \<circ> nat_of_integer"
   142 
   143 lemma integer_of_char_zero [simp, code]:
   144   "integer_of_char 0 = 0"
   145   by (simp add: integer_of_char_def integer_of_nat_def)
   146 
   147 lemma integer_of_char_Char [simp]:
   148   "integer_of_char (Char k) = numeral k mod 256"
   149   by (simp only: integer_of_char_def integer_of_nat_def comp_apply nat_of_char_Char map_fun_def
   150     id_apply zmod_int modulo_integer.abs_eq [symmetric]) simp
   151 
   152 lemma less_256_integer_of_char_Char:
   153   assumes "numeral k < (256 :: integer)"
   154   shows "integer_of_char (Char k) = numeral k"
   155 proof -
   156   have "numeral k mod 256 = (numeral k :: integer)"
   157     by (rule semiring_numeral_div_class.mod_less) (insert assms, simp_all)
   158   then show ?thesis using integer_of_char_Char [of k]
   159     by (simp only:)
   160 qed
   161 
   162 setup \<open>
   163 let
   164   val chars = map_range (Thm.cterm_of @{context} o HOLogic.mk_numeral o curry (op +) 1) 255;
   165   val simpset =
   166     put_simpset HOL_ss @{context}
   167       addsimps @{thms numeral_less_iff less_num_simps};
   168   fun mk_code_eqn ct =
   169     Thm.instantiate' [] [SOME ct] @{thm less_256_integer_of_char_Char}
   170     |> full_simplify simpset;
   171   val code_eqns = map mk_code_eqn chars;
   172 in
   173   Global_Theory.note_thmss ""
   174     [((@{binding integer_of_char_simps}, []), [(code_eqns, [])])]
   175   #> snd
   176 end
   177 \<close>
   178 
   179 declare integer_of_char_simps [code]
   180 
   181 lemma nat_of_char_code [code]:
   182   "nat_of_char = nat_of_integer \<circ> integer_of_char"
   183   by (simp add: integer_of_char_def fun_eq_iff)
   184 
   185 lemma char_of_nat_code [code]:
   186   "char_of_nat = char_of_integer \<circ> integer_of_nat"
   187   by (simp add: char_of_integer_def fun_eq_iff)
   188 
   189 instantiation char :: equal
   190 begin
   191 
   192 definition equal_char
   193   where "equal_char (c :: char) d \<longleftrightarrow> c = d"
   194 
   195 instance
   196   by standard (simp add: equal_char_def)
   197 
   198 end
   199 
   200 lemma equal_char_simps [code]:
   201   "HOL.equal (0::char) 0 \<longleftrightarrow> True"
   202   "HOL.equal (Char k) (Char l) \<longleftrightarrow> HOL.equal (numeral k mod 256 :: nat) (numeral l mod 256)"
   203   "HOL.equal 0 (Char k) \<longleftrightarrow> HOL.equal (numeral k mod 256 :: nat) 0"
   204   "HOL.equal (Char k) 0 \<longleftrightarrow> HOL.equal (numeral k mod 256 :: nat) 0"
   205   by (simp_all only: equal refl Char_eq_Char_iff zero_eq_Char_iff Char_eq_zero_iff)
   206 
   207 syntax
   208   "_Char" :: "str_position \<Rightarrow> char"    ("CHR _")
   209   "_Char_ord" :: "num_const \<Rightarrow> char"   ("CHR _")
   210 
   211 type_synonym string = "char list"
   212 
   213 syntax
   214   "_String" :: "str_position => string"    ("_")
   215 
   216 ML_file "Tools/string_syntax.ML"
   217 
   218 instantiation char :: enum
   219 begin
   220 
   221 definition
   222   "Enum.enum = [0, CHR 0x01, CHR 0x02, CHR 0x03,
   223     CHR 0x04, CHR 0x05, CHR 0x06, CHR 0x07,
   224     CHR 0x08, CHR 0x09, CHR ''\<newline>'', CHR 0x0B,
   225     CHR 0x0C, CHR 0x0D, CHR 0x0E, CHR 0x0F,
   226     CHR 0x10, CHR 0x11, CHR 0x12, CHR 0x13,
   227     CHR 0x14, CHR 0x15, CHR 0x16, CHR 0x17,
   228     CHR 0x18, CHR 0x19, CHR 0x1A, CHR 0x1B,
   229     CHR 0x1C, CHR 0x1D, CHR 0x1E, CHR 0x1F,
   230     CHR '' '', CHR ''!'', CHR 0x22, CHR ''#'',
   231     CHR ''$'', CHR ''%'', CHR ''&'', CHR 0x27,
   232     CHR ''('', CHR '')'', CHR ''*'', CHR ''+'',
   233     CHR '','', CHR ''-'', CHR ''.'', CHR ''/'',
   234     CHR ''0'', CHR ''1'', CHR ''2'', CHR ''3'',
   235     CHR ''4'', CHR ''5'', CHR ''6'', CHR ''7'',
   236     CHR ''8'', CHR ''9'', CHR '':'', CHR '';'',
   237     CHR ''<'', CHR ''='', CHR ''>'', CHR ''?'',
   238     CHR ''@'', CHR ''A'', CHR ''B'', CHR ''C'',
   239     CHR ''D'', CHR ''E'', CHR ''F'', CHR ''G'',
   240     CHR ''H'', CHR ''I'', CHR ''J'', CHR ''K'',
   241     CHR ''L'', CHR ''M'', CHR ''N'', CHR ''O'',
   242     CHR ''P'', CHR ''Q'', CHR ''R'', CHR ''S'',
   243     CHR ''T'', CHR ''U'', CHR ''V'', CHR ''W'',
   244     CHR ''X'', CHR ''Y'', CHR ''Z'', CHR ''['',
   245     CHR 0x5C, CHR '']'', CHR ''^'', CHR ''_'',
   246     CHR 0x60, CHR ''a'', CHR ''b'', CHR ''c'',
   247     CHR ''d'', CHR ''e'', CHR ''f'', CHR ''g'',
   248     CHR ''h'', CHR ''i'', CHR ''j'', CHR ''k'',
   249     CHR ''l'', CHR ''m'', CHR ''n'', CHR ''o'',
   250     CHR ''p'', CHR ''q'', CHR ''r'', CHR ''s'',
   251     CHR ''t'', CHR ''u'', CHR ''v'', CHR ''w'',
   252     CHR ''x'', CHR ''y'', CHR ''z'', CHR ''{'',
   253     CHR ''|'', CHR ''}'', CHR ''~'', CHR 0x7F,
   254     CHR 0x80, CHR 0x81, CHR 0x82, CHR 0x83,
   255     CHR 0x84, CHR 0x85, CHR 0x86, CHR 0x87,
   256     CHR 0x88, CHR 0x89, CHR 0x8A, CHR 0x8B,
   257     CHR 0x8C, CHR 0x8D, CHR 0x8E, CHR 0x8F,
   258     CHR 0x90, CHR 0x91, CHR 0x92, CHR 0x93,
   259     CHR 0x94, CHR 0x95, CHR 0x96, CHR 0x97,
   260     CHR 0x98, CHR 0x99, CHR 0x9A, CHR 0x9B,
   261     CHR 0x9C, CHR 0x9D, CHR 0x9E, CHR 0x9F,
   262     CHR 0xA0, CHR 0xA1, CHR 0xA2, CHR 0xA3,
   263     CHR 0xA4, CHR 0xA5, CHR 0xA6, CHR 0xA7,
   264     CHR 0xA8, CHR 0xA9, CHR 0xAA, CHR 0xAB,
   265     CHR 0xAC, CHR 0xAD, CHR 0xAE, CHR 0xAF,
   266     CHR 0xB0, CHR 0xB1, CHR 0xB2, CHR 0xB3,
   267     CHR 0xB4, CHR 0xB5, CHR 0xB6, CHR 0xB7,
   268     CHR 0xB8, CHR 0xB9, CHR 0xBA, CHR 0xBB,
   269     CHR 0xBC, CHR 0xBD, CHR 0xBE, CHR 0xBF,
   270     CHR 0xC0, CHR 0xC1, CHR 0xC2, CHR 0xC3,
   271     CHR 0xC4, CHR 0xC5, CHR 0xC6, CHR 0xC7,
   272     CHR 0xC8, CHR 0xC9, CHR 0xCA, CHR 0xCB,
   273     CHR 0xCC, CHR 0xCD, CHR 0xCE, CHR 0xCF,
   274     CHR 0xD0, CHR 0xD1, CHR 0xD2, CHR 0xD3,
   275     CHR 0xD4, CHR 0xD5, CHR 0xD6, CHR 0xD7,
   276     CHR 0xD8, CHR 0xD9, CHR 0xDA, CHR 0xDB,
   277     CHR 0xDC, CHR 0xDD, CHR 0xDE, CHR 0xDF,
   278     CHR 0xE0, CHR 0xE1, CHR 0xE2, CHR 0xE3,
   279     CHR 0xE4, CHR 0xE5, CHR 0xE6, CHR 0xE7,
   280     CHR 0xE8, CHR 0xE9, CHR 0xEA, CHR 0xEB,
   281     CHR 0xEC, CHR 0xED, CHR 0xEE, CHR 0xEF,
   282     CHR 0xF0, CHR 0xF1, CHR 0xF2, CHR 0xF3,
   283     CHR 0xF4, CHR 0xF5, CHR 0xF6, CHR 0xF7,
   284     CHR 0xF8, CHR 0xF9, CHR 0xFA, CHR 0xFB,
   285     CHR 0xFC, CHR 0xFD, CHR 0xFE, CHR 0xFF]"
   286 
   287 definition
   288   "Enum.enum_all P \<longleftrightarrow> list_all P (Enum.enum :: char list)"
   289 
   290 definition
   291   "Enum.enum_ex P \<longleftrightarrow> list_ex P (Enum.enum :: char list)"
   292 
   293 lemma enum_char_unfold:
   294   "Enum.enum = map char_of_nat [0..<256]"
   295 proof -
   296   have *: "Suc (Suc 0) = 2" by simp
   297   have "map nat_of_char Enum.enum = [0..<256]"
   298     by (simp add: enum_char_def upt_conv_Cons_Cons *)
   299   then have "map char_of_nat (map nat_of_char Enum.enum) =
   300     map char_of_nat [0..<256]" by simp
   301   then show ?thesis by (simp add: comp_def)
   302 qed
   303 
   304 instance proof
   305   show UNIV: "UNIV = set (Enum.enum :: char list)"
   306     by (simp add: enum_char_unfold UNIV_char_of_nat atLeast0LessThan)
   307   show "distinct (Enum.enum :: char list)"
   308     by (auto simp add: enum_char_unfold distinct_map intro: inj_onI)
   309   show "\<And>P. Enum.enum_all P \<longleftrightarrow> Ball (UNIV :: char set) P"
   310     by (simp add: UNIV enum_all_char_def list_all_iff)
   311   show "\<And>P. Enum.enum_ex P \<longleftrightarrow> Bex (UNIV :: char set) P"
   312     by (simp add: UNIV enum_ex_char_def list_ex_iff)
   313 qed
   314 
   315 end
   316 
   317 lemma char_of_integer_code [code]:
   318   "char_of_integer n = Enum.enum ! (nat_of_integer n mod 256)"
   319   by (simp add: char_of_integer_def enum_char_unfold)
   320 
   321 
   322 subsection \<open>Strings as dedicated type\<close>
   323 
   324 typedef literal = "UNIV :: string set"
   325   morphisms explode STR ..
   326 
   327 setup_lifting type_definition_literal
   328 
   329 lemma STR_inject' [simp]:
   330   "STR s = STR t \<longleftrightarrow> s = t"
   331   by transfer rule
   332 
   333 definition implode :: "string \<Rightarrow> String.literal"
   334 where
   335   [code del]: "implode = STR"
   336   
   337 instantiation literal :: size
   338 begin
   339 
   340 definition size_literal :: "literal \<Rightarrow> nat"
   341 where
   342   [code]: "size_literal (s::literal) = 0"
   343 
   344 instance ..
   345 
   346 end
   347 
   348 instantiation literal :: equal
   349 begin
   350 
   351 lift_definition equal_literal :: "literal \<Rightarrow> literal \<Rightarrow> bool" is "op =" .
   352 
   353 instance by intro_classes (transfer, simp)
   354 
   355 end
   356 
   357 declare equal_literal.rep_eq[code]
   358 
   359 lemma [code nbe]:
   360   fixes s :: "String.literal"
   361   shows "HOL.equal s s \<longleftrightarrow> True"
   362   by (simp add: equal)
   363 
   364 lifting_update literal.lifting
   365 lifting_forget literal.lifting
   366 
   367 subsection \<open>Code generator\<close>
   368 
   369 ML_file "Tools/string_code.ML"
   370 
   371 code_reserved SML string
   372 code_reserved OCaml string
   373 code_reserved Scala string
   374 
   375 code_printing
   376   type_constructor literal \<rightharpoonup>
   377     (SML) "string"
   378     and (OCaml) "string"
   379     and (Haskell) "String"
   380     and (Scala) "String"
   381 
   382 setup \<open>
   383   fold String_Code.add_literal_string ["SML", "OCaml", "Haskell", "Scala"]
   384 \<close>
   385 
   386 code_printing
   387   class_instance literal :: equal \<rightharpoonup>
   388     (Haskell) -
   389 | constant "HOL.equal :: literal \<Rightarrow> literal \<Rightarrow> bool" \<rightharpoonup>
   390     (SML) "!((_ : string) = _)"
   391     and (OCaml) "!((_ : string) = _)"
   392     and (Haskell) infix 4 "=="
   393     and (Scala) infixl 5 "=="
   394 
   395 setup \<open>Sign.map_naming (Name_Space.mandatory_path "Code")\<close>
   396 
   397 definition abort :: "literal \<Rightarrow> (unit \<Rightarrow> 'a) \<Rightarrow> 'a"
   398 where [simp, code del]: "abort _ f = f ()"
   399 
   400 lemma abort_cong: "msg = msg' ==> Code.abort msg f = Code.abort msg' f"
   401 by simp
   402 
   403 setup \<open>Sign.map_naming Name_Space.parent_path\<close>
   404 
   405 setup \<open>Code_Simp.map_ss (Simplifier.add_cong @{thm Code.abort_cong})\<close>
   406 
   407 code_printing constant Code.abort \<rightharpoonup>
   408     (SML) "!(raise/ Fail/ _)"
   409     and (OCaml) "failwith"
   410     and (Haskell) "!(error/ ::/ forall a./ String -> (() -> a) -> a)"
   411     and (Scala) "!{/ sys.error((_));/  ((_)).apply(())/ }"
   412 
   413 hide_type (open) literal
   414 
   415 hide_const (open) implode explode
   416 
   417 end