src/HOL/String.thy
author haftmann
Mon Jun 05 15:59:41 2017 +0200 (2017-06-05)
changeset 66010 2f7d39285a1a
parent 64994 6e4c05e8edbb
child 66190 a41435469559
permissions -rw-r--r--
executable domain membership checks
     1 (* Author: Tobias Nipkow, Florian Haftmann, TU Muenchen *)
     2 
     3 section \<open>Character and string types\<close>
     4 
     5 theory String
     6 imports Enum
     7 begin
     8 
     9 subsection \<open>Characters and strings\<close>
    10 
    11 subsubsection \<open>Characters as finite algebraic type\<close>
    12 
    13 typedef char = "{n::nat. n < 256}"
    14   morphisms nat_of_char Abs_char
    15 proof
    16   show "Suc 0 \<in> {n. n < 256}" by simp
    17 qed
    18 
    19 definition char_of_nat :: "nat \<Rightarrow> char"
    20 where
    21   "char_of_nat n = Abs_char (n mod 256)"
    22 
    23 lemma char_cases [case_names char_of_nat, cases type: char]:
    24   "(\<And>n. c = char_of_nat n \<Longrightarrow> n < 256 \<Longrightarrow> P) \<Longrightarrow> P"
    25   by (cases c) (simp add: char_of_nat_def)
    26 
    27 lemma char_of_nat_of_char [simp]:
    28   "char_of_nat (nat_of_char c) = c"
    29   by (cases c) (simp add: char_of_nat_def Abs_char_inject Abs_char_inverse)
    30 
    31 lemma inj_nat_of_char:
    32   "inj nat_of_char"
    33 proof (rule injI)
    34   fix c d
    35   assume "nat_of_char c = nat_of_char d"
    36   then have "char_of_nat (nat_of_char c) = char_of_nat (nat_of_char d)"
    37     by simp
    38   then show "c = d"
    39     by simp
    40 qed
    41   
    42 lemma nat_of_char_eq_iff [simp]:
    43   "nat_of_char c = nat_of_char d \<longleftrightarrow> c = d"
    44   by (fact nat_of_char_inject)
    45 
    46 lemma nat_of_char_of_nat [simp]:
    47   "nat_of_char (char_of_nat n) = n mod 256"
    48   by (cases "char_of_nat n") (simp add: char_of_nat_def Abs_char_inject Abs_char_inverse)
    49 
    50 lemma char_of_nat_mod_256 [simp]:
    51   "char_of_nat (n mod 256) = char_of_nat n"
    52   by (cases "char_of_nat (n mod 256)") (simp add: char_of_nat_def)
    53 
    54 lemma char_of_nat_quasi_inj [simp]:
    55   "char_of_nat m = char_of_nat n \<longleftrightarrow> m mod 256 = n mod 256"
    56   by (simp add: char_of_nat_def Abs_char_inject)
    57 
    58 lemma inj_on_char_of_nat [simp]:
    59   "inj_on char_of_nat {..<256}"
    60   by (rule inj_onI) simp
    61 
    62 lemma nat_of_char_mod_256 [simp]:
    63   "nat_of_char c mod 256 = nat_of_char c"
    64   by (cases c) simp
    65 
    66 lemma nat_of_char_less_256 [simp]:
    67   "nat_of_char c < 256"
    68 proof -
    69   have "nat_of_char c mod 256 < 256"
    70     by arith
    71   then show ?thesis by simp
    72 qed
    73 
    74 lemma UNIV_char_of_nat:
    75   "UNIV = char_of_nat ` {..<256}"
    76 proof -
    77   { fix c
    78     have "c \<in> char_of_nat ` {..<256}"
    79       by (cases c) auto
    80   } then show ?thesis by auto
    81 qed
    82 
    83 lemma card_UNIV_char:
    84   "card (UNIV :: char set) = 256"
    85   by (auto simp add: UNIV_char_of_nat card_image)
    86 
    87 lemma range_nat_of_char:
    88   "range nat_of_char = {..<256}"
    89   by (auto simp add: UNIV_char_of_nat image_image image_def)
    90 
    91 
    92 subsubsection \<open>Character literals as variant of numerals\<close>
    93 
    94 instantiation char :: zero
    95 begin
    96 
    97 definition zero_char :: char
    98   where "0 = char_of_nat 0"
    99 
   100 instance ..
   101 
   102 end
   103 
   104 definition Char :: "num \<Rightarrow> char"
   105   where "Char k = char_of_nat (numeral k)"
   106 
   107 code_datatype "0 :: char" Char
   108 
   109 lemma nat_of_char_zero [simp]:
   110   "nat_of_char 0 = 0"
   111   by (simp add: zero_char_def)
   112 
   113 lemma nat_of_char_Char [simp]:
   114   "nat_of_char (Char k) = numeral k mod 256"
   115   by (simp add: Char_def)
   116 
   117 lemma Char_eq_Char_iff:
   118   "Char k = Char l \<longleftrightarrow> numeral k mod (256 :: nat) = numeral l mod 256" (is "?P \<longleftrightarrow> ?Q")
   119 proof -
   120   have "?P \<longleftrightarrow> nat_of_char (Char k) = nat_of_char (Char l)"
   121     by (rule sym, rule inj_eq) (fact inj_nat_of_char)
   122   also have "nat_of_char (Char k) = nat_of_char (Char l) \<longleftrightarrow> ?Q"
   123     by (simp only: Char_def nat_of_char_of_nat)
   124   finally show ?thesis .
   125 qed
   126 
   127 lemma zero_eq_Char_iff:
   128   "0 = Char k \<longleftrightarrow> numeral k mod (256 :: nat) = 0"
   129   by (auto simp add: zero_char_def Char_def)
   130 
   131 lemma Char_eq_zero_iff:
   132   "Char k = 0 \<longleftrightarrow> numeral k mod (256 :: nat) = 0"
   133   by (auto simp add: zero_char_def Char_def) 
   134 
   135 simproc_setup char_eq
   136   ("Char m = Char n" | "Char m = 0" | "0 = Char n") = \<open>
   137   let
   138     val ss = put_simpset HOL_ss @{context}
   139       |> fold Simplifier.add_simp @{thms Char_eq_Char_iff zero_eq_Char_iff Char_eq_zero_iff cong_exp_iff_simps}
   140       |> simpset_of 
   141   in
   142     fn _ => fn ctxt => SOME o Simplifier.rewrite (put_simpset ss ctxt)
   143   end
   144 \<close>
   145 
   146 definition integer_of_char :: "char \<Rightarrow> integer"
   147 where
   148   "integer_of_char = integer_of_nat \<circ> nat_of_char"
   149 
   150 definition char_of_integer :: "integer \<Rightarrow> char"
   151 where
   152   "char_of_integer = char_of_nat \<circ> nat_of_integer"
   153 
   154 lemma integer_of_char_zero [simp, code]:
   155   "integer_of_char 0 = 0"
   156   by (simp add: integer_of_char_def integer_of_nat_def)
   157 
   158 lemma integer_of_char_Char [simp]:
   159   "integer_of_char (Char k) = numeral k mod 256"
   160   by (simp only: integer_of_char_def integer_of_nat_def comp_apply nat_of_char_Char map_fun_def
   161     id_apply zmod_int modulo_integer.abs_eq [symmetric]) simp
   162 
   163 lemma less_256_integer_of_char_Char:
   164   assumes "numeral k < (256 :: integer)"
   165   shows "integer_of_char (Char k) = numeral k"
   166 proof -
   167   have "numeral k mod 256 = (numeral k :: integer)"
   168     by (rule semiring_numeral_div_class.mod_less) (insert assms, simp_all)
   169   then show ?thesis using integer_of_char_Char [of k]
   170     by (simp only:)
   171 qed
   172 
   173 setup \<open>
   174 let
   175   val chars = map_range (Thm.cterm_of @{context} o HOLogic.mk_numeral o curry (op +) 1) 255;
   176   val simpset =
   177     put_simpset HOL_ss @{context}
   178       addsimps @{thms numeral_less_iff less_num_simps};
   179   fun mk_code_eqn ct =
   180     Thm.instantiate' [] [SOME ct] @{thm less_256_integer_of_char_Char}
   181     |> full_simplify simpset;
   182   val code_eqns = map mk_code_eqn chars;
   183 in
   184   Global_Theory.note_thmss ""
   185     [((@{binding integer_of_char_simps}, []), [(code_eqns, [])])]
   186   #> snd
   187 end
   188 \<close>
   189 
   190 declare integer_of_char_simps [code]
   191 
   192 lemma nat_of_char_code [code]:
   193   "nat_of_char = nat_of_integer \<circ> integer_of_char"
   194   by (simp add: integer_of_char_def fun_eq_iff)
   195 
   196 lemma char_of_nat_code [code]:
   197   "char_of_nat = char_of_integer \<circ> integer_of_nat"
   198   by (simp add: char_of_integer_def fun_eq_iff)
   199 
   200 instantiation char :: equal
   201 begin
   202 
   203 definition equal_char
   204   where "equal_char (c :: char) d \<longleftrightarrow> c = d"
   205 
   206 instance
   207   by standard (simp add: equal_char_def)
   208 
   209 end
   210 
   211 lemma equal_char_simps [code]:
   212   "HOL.equal (0::char) 0 \<longleftrightarrow> True"
   213   "HOL.equal (Char k) (Char l) \<longleftrightarrow> HOL.equal (numeral k mod 256 :: nat) (numeral l mod 256)"
   214   "HOL.equal 0 (Char k) \<longleftrightarrow> HOL.equal (numeral k mod 256 :: nat) 0"
   215   "HOL.equal (Char k) 0 \<longleftrightarrow> HOL.equal (numeral k mod 256 :: nat) 0"
   216   by (simp_all only: equal refl Char_eq_Char_iff zero_eq_Char_iff Char_eq_zero_iff)
   217 
   218 syntax
   219   "_Char" :: "str_position \<Rightarrow> char"    ("CHR _")
   220   "_Char_ord" :: "num_const \<Rightarrow> char"   ("CHR _")
   221 
   222 type_synonym string = "char list"
   223 
   224 syntax
   225   "_String" :: "str_position => string"    ("_")
   226 
   227 ML_file "Tools/string_syntax.ML"
   228 
   229 instantiation char :: enum
   230 begin
   231 
   232 definition
   233   "Enum.enum = [0, CHR 0x01, CHR 0x02, CHR 0x03,
   234     CHR 0x04, CHR 0x05, CHR 0x06, CHR 0x07,
   235     CHR 0x08, CHR 0x09, CHR ''\<newline>'', CHR 0x0B,
   236     CHR 0x0C, CHR 0x0D, CHR 0x0E, CHR 0x0F,
   237     CHR 0x10, CHR 0x11, CHR 0x12, CHR 0x13,
   238     CHR 0x14, CHR 0x15, CHR 0x16, CHR 0x17,
   239     CHR 0x18, CHR 0x19, CHR 0x1A, CHR 0x1B,
   240     CHR 0x1C, CHR 0x1D, CHR 0x1E, CHR 0x1F,
   241     CHR '' '', CHR ''!'', CHR 0x22, CHR ''#'',
   242     CHR ''$'', CHR ''%'', CHR ''&'', CHR 0x27,
   243     CHR ''('', CHR '')'', CHR ''*'', CHR ''+'',
   244     CHR '','', CHR ''-'', CHR ''.'', CHR ''/'',
   245     CHR ''0'', CHR ''1'', CHR ''2'', CHR ''3'',
   246     CHR ''4'', CHR ''5'', CHR ''6'', CHR ''7'',
   247     CHR ''8'', CHR ''9'', CHR '':'', CHR '';'',
   248     CHR ''<'', CHR ''='', CHR ''>'', CHR ''?'',
   249     CHR ''@'', CHR ''A'', CHR ''B'', CHR ''C'',
   250     CHR ''D'', CHR ''E'', CHR ''F'', CHR ''G'',
   251     CHR ''H'', CHR ''I'', CHR ''J'', CHR ''K'',
   252     CHR ''L'', CHR ''M'', CHR ''N'', CHR ''O'',
   253     CHR ''P'', CHR ''Q'', CHR ''R'', CHR ''S'',
   254     CHR ''T'', CHR ''U'', CHR ''V'', CHR ''W'',
   255     CHR ''X'', CHR ''Y'', CHR ''Z'', CHR ''['',
   256     CHR 0x5C, CHR '']'', CHR ''^'', CHR ''_'',
   257     CHR 0x60, CHR ''a'', CHR ''b'', CHR ''c'',
   258     CHR ''d'', CHR ''e'', CHR ''f'', CHR ''g'',
   259     CHR ''h'', CHR ''i'', CHR ''j'', CHR ''k'',
   260     CHR ''l'', CHR ''m'', CHR ''n'', CHR ''o'',
   261     CHR ''p'', CHR ''q'', CHR ''r'', CHR ''s'',
   262     CHR ''t'', CHR ''u'', CHR ''v'', CHR ''w'',
   263     CHR ''x'', CHR ''y'', CHR ''z'', CHR ''{'',
   264     CHR ''|'', CHR ''}'', CHR ''~'', CHR 0x7F,
   265     CHR 0x80, CHR 0x81, CHR 0x82, CHR 0x83,
   266     CHR 0x84, CHR 0x85, CHR 0x86, CHR 0x87,
   267     CHR 0x88, CHR 0x89, CHR 0x8A, CHR 0x8B,
   268     CHR 0x8C, CHR 0x8D, CHR 0x8E, CHR 0x8F,
   269     CHR 0x90, CHR 0x91, CHR 0x92, CHR 0x93,
   270     CHR 0x94, CHR 0x95, CHR 0x96, CHR 0x97,
   271     CHR 0x98, CHR 0x99, CHR 0x9A, CHR 0x9B,
   272     CHR 0x9C, CHR 0x9D, CHR 0x9E, CHR 0x9F,
   273     CHR 0xA0, CHR 0xA1, CHR 0xA2, CHR 0xA3,
   274     CHR 0xA4, CHR 0xA5, CHR 0xA6, CHR 0xA7,
   275     CHR 0xA8, CHR 0xA9, CHR 0xAA, CHR 0xAB,
   276     CHR 0xAC, CHR 0xAD, CHR 0xAE, CHR 0xAF,
   277     CHR 0xB0, CHR 0xB1, CHR 0xB2, CHR 0xB3,
   278     CHR 0xB4, CHR 0xB5, CHR 0xB6, CHR 0xB7,
   279     CHR 0xB8, CHR 0xB9, CHR 0xBA, CHR 0xBB,
   280     CHR 0xBC, CHR 0xBD, CHR 0xBE, CHR 0xBF,
   281     CHR 0xC0, CHR 0xC1, CHR 0xC2, CHR 0xC3,
   282     CHR 0xC4, CHR 0xC5, CHR 0xC6, CHR 0xC7,
   283     CHR 0xC8, CHR 0xC9, CHR 0xCA, CHR 0xCB,
   284     CHR 0xCC, CHR 0xCD, CHR 0xCE, CHR 0xCF,
   285     CHR 0xD0, CHR 0xD1, CHR 0xD2, CHR 0xD3,
   286     CHR 0xD4, CHR 0xD5, CHR 0xD6, CHR 0xD7,
   287     CHR 0xD8, CHR 0xD9, CHR 0xDA, CHR 0xDB,
   288     CHR 0xDC, CHR 0xDD, CHR 0xDE, CHR 0xDF,
   289     CHR 0xE0, CHR 0xE1, CHR 0xE2, CHR 0xE3,
   290     CHR 0xE4, CHR 0xE5, CHR 0xE6, CHR 0xE7,
   291     CHR 0xE8, CHR 0xE9, CHR 0xEA, CHR 0xEB,
   292     CHR 0xEC, CHR 0xED, CHR 0xEE, CHR 0xEF,
   293     CHR 0xF0, CHR 0xF1, CHR 0xF2, CHR 0xF3,
   294     CHR 0xF4, CHR 0xF5, CHR 0xF6, CHR 0xF7,
   295     CHR 0xF8, CHR 0xF9, CHR 0xFA, CHR 0xFB,
   296     CHR 0xFC, CHR 0xFD, CHR 0xFE, CHR 0xFF]"
   297 
   298 definition
   299   "Enum.enum_all P \<longleftrightarrow> list_all P (Enum.enum :: char list)"
   300 
   301 definition
   302   "Enum.enum_ex P \<longleftrightarrow> list_ex P (Enum.enum :: char list)"
   303 
   304 lemma enum_char_unfold:
   305   "Enum.enum = map char_of_nat [0..<256]"
   306 proof -
   307   have *: "Suc (Suc 0) = 2" by simp
   308   have "map nat_of_char Enum.enum = [0..<256]"
   309     by (simp add: enum_char_def upt_conv_Cons_Cons *)
   310   then have "map char_of_nat (map nat_of_char Enum.enum) =
   311     map char_of_nat [0..<256]" by simp
   312   then show ?thesis by (simp add: comp_def)
   313 qed
   314 
   315 instance proof
   316   show UNIV: "UNIV = set (Enum.enum :: char list)"
   317     by (simp add: enum_char_unfold UNIV_char_of_nat atLeast0LessThan)
   318   show "distinct (Enum.enum :: char list)"
   319     by (auto simp add: enum_char_unfold distinct_map intro: inj_onI)
   320   show "\<And>P. Enum.enum_all P \<longleftrightarrow> Ball (UNIV :: char set) P"
   321     by (simp add: UNIV enum_all_char_def list_all_iff)
   322   show "\<And>P. Enum.enum_ex P \<longleftrightarrow> Bex (UNIV :: char set) P"
   323     by (simp add: UNIV enum_ex_char_def list_ex_iff)
   324 qed
   325 
   326 end
   327 
   328 lemma char_of_integer_code [code]:
   329   "char_of_integer n = Enum.enum ! (nat_of_integer n mod 256)"
   330   by (simp add: char_of_integer_def enum_char_unfold)
   331 
   332 
   333 subsection \<open>Strings as dedicated type\<close>
   334 
   335 typedef literal = "UNIV :: string set"
   336   morphisms explode STR ..
   337 
   338 setup_lifting type_definition_literal
   339 
   340 lemma STR_inject' [simp]:
   341   "STR s = STR t \<longleftrightarrow> s = t"
   342   by transfer rule
   343 
   344 definition implode :: "string \<Rightarrow> String.literal"
   345 where
   346   [code del]: "implode = STR"
   347   
   348 instantiation literal :: size
   349 begin
   350 
   351 definition size_literal :: "literal \<Rightarrow> nat"
   352 where
   353   [code]: "size_literal (s::literal) = 0"
   354 
   355 instance ..
   356 
   357 end
   358 
   359 instantiation literal :: equal
   360 begin
   361 
   362 lift_definition equal_literal :: "literal \<Rightarrow> literal \<Rightarrow> bool" is "op =" .
   363 
   364 instance by intro_classes (transfer, simp)
   365 
   366 end
   367 
   368 declare equal_literal.rep_eq[code]
   369 
   370 lemma [code nbe]:
   371   fixes s :: "String.literal"
   372   shows "HOL.equal s s \<longleftrightarrow> True"
   373   by (simp add: equal)
   374 
   375 lifting_update literal.lifting
   376 lifting_forget literal.lifting
   377 
   378   
   379 subsection \<open>Dedicated conversion for generated computations\<close>
   380 
   381 definition char_of_num :: "num \<Rightarrow> char"
   382   where "char_of_num = char_of_nat o nat_of_num"
   383 
   384 lemma [code_computation_unfold]:
   385   "Char = char_of_num"
   386   by (simp add: fun_eq_iff char_of_num_def nat_of_num_numeral Char_def)
   387 
   388 
   389 subsection \<open>Code generator\<close>
   390 
   391 ML_file "Tools/string_code.ML"
   392 
   393 code_reserved SML string
   394 code_reserved OCaml string
   395 code_reserved Scala string
   396 
   397 code_printing
   398   type_constructor literal \<rightharpoonup>
   399     (SML) "string"
   400     and (OCaml) "string"
   401     and (Haskell) "String"
   402     and (Scala) "String"
   403 
   404 setup \<open>
   405   fold String_Code.add_literal_string ["SML", "OCaml", "Haskell", "Scala"]
   406 \<close>
   407 
   408 code_printing
   409   class_instance literal :: equal \<rightharpoonup>
   410     (Haskell) -
   411 | constant "HOL.equal :: literal \<Rightarrow> literal \<Rightarrow> bool" \<rightharpoonup>
   412     (SML) "!((_ : string) = _)"
   413     and (OCaml) "!((_ : string) = _)"
   414     and (Haskell) infix 4 "=="
   415     and (Scala) infixl 5 "=="
   416 
   417 setup \<open>Sign.map_naming (Name_Space.mandatory_path "Code")\<close>
   418 
   419 definition abort :: "literal \<Rightarrow> (unit \<Rightarrow> 'a) \<Rightarrow> 'a"
   420 where [simp, code del]: "abort _ f = f ()"
   421 
   422 lemma abort_cong: "msg = msg' ==> Code.abort msg f = Code.abort msg' f"
   423 by simp
   424 
   425 setup \<open>Sign.map_naming Name_Space.parent_path\<close>
   426 
   427 setup \<open>Code_Simp.map_ss (Simplifier.add_cong @{thm Code.abort_cong})\<close>
   428 
   429 code_printing constant Code.abort \<rightharpoonup>
   430     (SML) "!(raise/ Fail/ _)"
   431     and (OCaml) "failwith"
   432     and (Haskell) "!(error/ ::/ forall a./ String -> (() -> a) -> a)"
   433     and (Scala) "!{/ sys.error((_));/  ((_)).apply(())/ }"
   434 
   435 hide_type (open) literal
   436 
   437 hide_const (open) implode explode
   438 
   439 end