src/HOL/String.thy
author haftmann
Sat Mar 12 22:04:52 2016 +0100 (2016-03-12)
changeset 62597 b3f2b8c906a6
parent 62580 7011429f44f9
child 62678 843ff6f1de38
permissions -rw-r--r--
model characters directly as range 0..255
* * *
operate on syntax terms rather than asts
     1 (* Author: Tobias Nipkow, Florian Haftmann, TU Muenchen *)
     2 
     3 section \<open>Character and string types\<close>
     4 
     5 theory String
     6 imports Enum
     7 begin
     8 
     9 subsection \<open>Characters and strings\<close>
    10 
    11 subsubsection \<open>Characters as finite algebraic type\<close>
    12 
    13 typedef char = "{n::nat. n < 256}"
    14   morphisms nat_of_char Abs_char
    15 proof
    16   show "Suc 0 \<in> {n. n < 256}" by simp
    17 qed
    18 
    19 definition char_of_nat :: "nat \<Rightarrow> char"
    20 where
    21   "char_of_nat n = Abs_char (n mod 256)"
    22 
    23 lemma char_cases [case_names char_of_nat, cases type: char]:
    24   "(\<And>n. c = char_of_nat n \<Longrightarrow> n < 256 \<Longrightarrow> P) \<Longrightarrow> P"
    25   by (cases c) (simp add: char_of_nat_def)
    26 
    27 lemma char_of_nat_of_char [simp]:
    28   "char_of_nat (nat_of_char c) = c"
    29   by (cases c) (simp add: char_of_nat_def Abs_char_inject Abs_char_inverse)
    30 
    31 lemma inj_nat_of_char:
    32   "inj nat_of_char"
    33 proof (rule injI)
    34   fix c d
    35   assume "nat_of_char c = nat_of_char d"
    36   then have "char_of_nat (nat_of_char c) = char_of_nat (nat_of_char d)"
    37     by simp
    38   then show "c = d"
    39     by simp
    40 qed
    41   
    42 lemma nat_of_char_eq_iff [simp]:
    43   "nat_of_char c = nat_of_char d \<longleftrightarrow> c = d"
    44   by (fact nat_of_char_inject)
    45 
    46 lemma nat_of_char_of_nat [simp]:
    47   "nat_of_char (char_of_nat n) = n mod 256"
    48   by (cases "char_of_nat n") (simp add: char_of_nat_def Abs_char_inject Abs_char_inverse)
    49 
    50 lemma char_of_nat_mod_256 [simp]:
    51   "char_of_nat (n mod 256) = char_of_nat n"
    52   by (cases "char_of_nat (n mod 256)") (simp add: char_of_nat_def)
    53 
    54 lemma char_of_nat_quasi_inj [simp]:
    55   "char_of_nat m = char_of_nat n \<longleftrightarrow> m mod 256 = n mod 256"
    56   by (simp add: char_of_nat_def Abs_char_inject)
    57 
    58 lemma inj_on_char_of_nat [simp]:
    59   "inj_on char_of_nat {..<256}"
    60   by (rule inj_onI) simp
    61 
    62 lemma nat_of_char_mod_256 [simp]:
    63   "nat_of_char c mod 256 = nat_of_char c"
    64   by (cases c) simp
    65 
    66 lemma nat_of_char_less_256 [simp]:
    67   "nat_of_char c < 256"
    68 proof -
    69   have "nat_of_char c mod 256 < 256"
    70     by arith
    71   then show ?thesis by simp
    72 qed
    73 
    74 lemma UNIV_char_of_nat:
    75   "UNIV = char_of_nat ` {..<256}"
    76 proof -
    77   { fix c
    78     have "c \<in> char_of_nat ` {..<256}"
    79       by (cases c) auto
    80   } then show ?thesis by auto
    81 qed
    82 
    83 lemma card_UNIV_char:
    84   "card (UNIV :: char set) = 256"
    85   by (auto simp add: UNIV_char_of_nat card_image)
    86 
    87 lemma range_nat_of_char:
    88   "range nat_of_char = {..<256}"
    89   by (auto simp add: UNIV_char_of_nat image_image image_def)
    90 
    91 
    92 subsubsection \<open>Character literals as variant of numerals\<close>
    93 
    94 instantiation char :: zero
    95 begin
    96 
    97 definition zero_char :: char
    98   where "0 = char_of_nat 0"
    99 
   100 instance ..
   101 
   102 end
   103 
   104 definition Char :: "num \<Rightarrow> char"
   105   where "Char k = char_of_nat (numeral k)"
   106 
   107 code_datatype "0 :: char" Char
   108 
   109 lemma nat_of_char_zero [simp]:
   110   "nat_of_char 0 = 0"
   111   by (simp add: zero_char_def)
   112 
   113 lemma nat_of_char_Char [simp]:
   114   "nat_of_char (Char k) = numeral k mod 256"
   115   by (simp add: Char_def)
   116 
   117 lemma Char_eq_Char_iff [simp]:
   118   "Char k = Char l \<longleftrightarrow> numeral k mod (256 :: nat) = numeral l mod 256" (is "?P \<longleftrightarrow> ?Q")
   119 proof -
   120   have "?P \<longleftrightarrow> nat_of_char (Char k) = nat_of_char (Char l)"
   121     by (rule sym, rule inj_eq) (fact inj_nat_of_char)
   122   also have "nat_of_char (Char k) = nat_of_char (Char l) \<longleftrightarrow> ?Q"
   123     by (simp only: Char_def nat_of_char_of_nat)
   124   finally show ?thesis .
   125 qed
   126 
   127 lemma zero_eq_Char_iff [simp]:
   128   "0 = Char k \<longleftrightarrow> numeral k mod (256 :: nat) = 0"
   129   by (auto simp add: zero_char_def Char_def)
   130 
   131 lemma Char_eq_zero_iff [simp]:
   132   "Char k = 0 \<longleftrightarrow> numeral k mod (256 :: nat) = 0"
   133   by (auto simp add: zero_char_def Char_def) 
   134 
   135 definition integer_of_char :: "char \<Rightarrow> integer"
   136 where
   137   "integer_of_char = integer_of_nat \<circ> nat_of_char"
   138 
   139 definition char_of_integer :: "integer \<Rightarrow> char"
   140 where
   141   "char_of_integer = char_of_nat \<circ> nat_of_integer"
   142 
   143 lemma integer_of_char_zero [simp, code]:
   144   "integer_of_char 0 = 0"
   145   by (simp add: integer_of_char_def integer_of_nat_def)
   146 
   147 lemma integer_of_char_Char [simp]:
   148   "integer_of_char (Char k) = numeral k mod 256"
   149   by (simp only: integer_of_char_def integer_of_nat_def comp_apply nat_of_char_Char map_fun_def
   150     id_apply zmod_int mod_integer.abs_eq [symmetric]) simp
   151 
   152 lemma less_256_integer_of_char_Char:
   153   assumes "numeral k < (256 :: integer)"
   154   shows "integer_of_char (Char k) = numeral k"
   155 proof -
   156   have "numeral k mod 256 = (numeral k :: integer)"
   157     by (rule semiring_numeral_div_class.mod_less) (insert assms, simp_all)
   158   then show ?thesis using integer_of_char_Char [of k]
   159     by (simp only:)
   160 qed
   161 
   162 setup \<open>
   163 let
   164   val chars = map_range (Thm.cterm_of @{context} o HOLogic.mk_numeral o curry (op +) 1) 255;
   165   val simpset =
   166     put_simpset HOL_ss @{context}
   167       addsimps @{thms numeral_less_iff less_num_simps};
   168   fun mk_code_eqn ct =
   169     Thm.instantiate' [] [SOME ct] @{thm less_256_integer_of_char_Char}
   170     |> full_simplify simpset;
   171   val code_eqns = map mk_code_eqn chars;
   172 in
   173   Global_Theory.note_thmss ""
   174     [((@{binding integer_of_char_simps}, []), [(code_eqns, [])])]
   175   #> snd
   176 end
   177 \<close>
   178 
   179 declare integer_of_char_simps [code]
   180 
   181 lemma nat_of_char_code [code]:
   182   "nat_of_char = nat_of_integer \<circ> integer_of_char"
   183   by (simp add: integer_of_char_def fun_eq_iff)
   184 
   185 lemma char_of_nat_code [code]:
   186   "char_of_nat = char_of_integer \<circ> integer_of_nat"
   187   by (simp add: char_of_integer_def fun_eq_iff)
   188 
   189 instantiation char :: equal
   190 begin
   191 
   192 definition equal_char
   193   where "equal_char (c :: char) d \<longleftrightarrow> c = d"
   194 
   195 instance
   196   by standard (simp add: equal_char_def)
   197 
   198 end
   199 
   200 lemma equal_char_simps [code]:
   201   "HOL.equal (0::char) 0 \<longleftrightarrow> True"
   202   "HOL.equal (Char k) (Char l) \<longleftrightarrow> HOL.equal (numeral k mod 256 :: nat) (numeral l mod 256)"
   203   "HOL.equal 0 (Char k) \<longleftrightarrow> HOL.equal (numeral k mod 256 :: nat) 0"
   204   "HOL.equal (Char k) 0 \<longleftrightarrow> HOL.equal (numeral k mod 256 :: nat) 0"
   205   by (simp_all only: equal refl Char_eq_Char_iff zero_eq_Char_iff Char_eq_zero_iff)
   206 
   207 syntax
   208   "_Char" :: "str_position \<Rightarrow> char"    ("CHR _")
   209 
   210 syntax
   211   "_Char_ord" :: "num_const \<Rightarrow> char"     ("CHAR _")
   212 
   213 type_synonym string = "char list"
   214 
   215 syntax
   216   "_String" :: "str_position => string"    ("_")
   217 
   218 ML_file "Tools/string_syntax.ML"
   219 
   220 instantiation char :: enum
   221 begin
   222 
   223 definition
   224   "Enum.enum = [0, CHAR 0x01, CHAR 0x02, CHAR 0x03,
   225     CHAR 0x04, CHAR 0x05, CHAR 0x06, CHAR 0x07,
   226     CHAR 0x08, CHAR 0x09, CHR ''\<newline>'', CHAR 0x0B,
   227     CHAR 0x0C, CHAR 0x0D, CHAR 0x0E, CHAR 0x0F,
   228     CHAR 0x10, CHAR 0x11, CHAR 0x12, CHAR 0x13,
   229     CHAR 0x14, CHAR 0x15, CHAR 0x16, CHAR 0x17,
   230     CHAR 0x18, CHAR 0x19, CHAR 0x1A, CHAR 0x1B,
   231     CHAR 0x1C, CHAR 0x1D, CHAR 0x1E, CHAR 0x1F,
   232     CHR '' '', CHR ''!'', CHAR 0x22, CHR ''#'',
   233     CHR ''$'', CHR ''%'', CHR ''&'', CHAR 0x27,
   234     CHR ''('', CHR '')'', CHR ''*'', CHR ''+'',
   235     CHR '','', CHR ''-'', CHR ''.'', CHR ''/'',
   236     CHR ''0'', CHR ''1'', CHR ''2'', CHR ''3'',
   237     CHR ''4'', CHR ''5'', CHR ''6'', CHR ''7'',
   238     CHR ''8'', CHR ''9'', CHR '':'', CHR '';'',
   239     CHR ''<'', CHR ''='', CHR ''>'', CHR ''?'',
   240     CHR ''@'', CHR ''A'', CHR ''B'', CHR ''C'',
   241     CHR ''D'', CHR ''E'', CHR ''F'', CHR ''G'',
   242     CHR ''H'', CHR ''I'', CHR ''J'', CHR ''K'',
   243     CHR ''L'', CHR ''M'', CHR ''N'', CHR ''O'',
   244     CHR ''P'', CHR ''Q'', CHR ''R'', CHR ''S'',
   245     CHR ''T'', CHR ''U'', CHR ''V'', CHR ''W'',
   246     CHR ''X'', CHR ''Y'', CHR ''Z'', CHR ''['',
   247     CHAR 0x5C, CHR '']'', CHR ''^'', CHR ''_'',
   248     CHAR 0x60, CHR ''a'', CHR ''b'', CHR ''c'',
   249     CHR ''d'', CHR ''e'', CHR ''f'', CHR ''g'',
   250     CHR ''h'', CHR ''i'', CHR ''j'', CHR ''k'',
   251     CHR ''l'', CHR ''m'', CHR ''n'', CHR ''o'',
   252     CHR ''p'', CHR ''q'', CHR ''r'', CHR ''s'',
   253     CHR ''t'', CHR ''u'', CHR ''v'', CHR ''w'',
   254     CHR ''x'', CHR ''y'', CHR ''z'', CHR ''{'',
   255     CHR ''|'', CHR ''}'', CHR ''~'', CHAR 0x7F,
   256     CHAR 0x80, CHAR 0x81, CHAR 0x82, CHAR 0x83,
   257     CHAR 0x84, CHAR 0x85, CHAR 0x86, CHAR 0x87,
   258     CHAR 0x88, CHAR 0x89, CHAR 0x8A, CHAR 0x8B,
   259     CHAR 0x8C, CHAR 0x8D, CHAR 0x8E, CHAR 0x8F,
   260     CHAR 0x90, CHAR 0x91, CHAR 0x92, CHAR 0x93,
   261     CHAR 0x94, CHAR 0x95, CHAR 0x96, CHAR 0x97,
   262     CHAR 0x98, CHAR 0x99, CHAR 0x9A, CHAR 0x9B,
   263     CHAR 0x9C, CHAR 0x9D, CHAR 0x9E, CHAR 0x9F,
   264     CHAR 0xA0, CHAR 0xA1, CHAR 0xA2, CHAR 0xA3,
   265     CHAR 0xA4, CHAR 0xA5, CHAR 0xA6, CHAR 0xA7,
   266     CHAR 0xA8, CHAR 0xA9, CHAR 0xAA, CHAR 0xAB,
   267     CHAR 0xAC, CHAR 0xAD, CHAR 0xAE, CHAR 0xAF,
   268     CHAR 0xB0, CHAR 0xB1, CHAR 0xB2, CHAR 0xB3,
   269     CHAR 0xB4, CHAR 0xB5, CHAR 0xB6, CHAR 0xB7,
   270     CHAR 0xB8, CHAR 0xB9, CHAR 0xBA, CHAR 0xBB,
   271     CHAR 0xBC, CHAR 0xBD, CHAR 0xBE, CHAR 0xBF,
   272     CHAR 0xC0, CHAR 0xC1, CHAR 0xC2, CHAR 0xC3,
   273     CHAR 0xC4, CHAR 0xC5, CHAR 0xC6, CHAR 0xC7,
   274     CHAR 0xC8, CHAR 0xC9, CHAR 0xCA, CHAR 0xCB,
   275     CHAR 0xCC, CHAR 0xCD, CHAR 0xCE, CHAR 0xCF,
   276     CHAR 0xD0, CHAR 0xD1, CHAR 0xD2, CHAR 0xD3,
   277     CHAR 0xD4, CHAR 0xD5, CHAR 0xD6, CHAR 0xD7,
   278     CHAR 0xD8, CHAR 0xD9, CHAR 0xDA, CHAR 0xDB,
   279     CHAR 0xDC, CHAR 0xDD, CHAR 0xDE, CHAR 0xDF,
   280     CHAR 0xE0, CHAR 0xE1, CHAR 0xE2, CHAR 0xE3,
   281     CHAR 0xE4, CHAR 0xE5, CHAR 0xE6, CHAR 0xE7,
   282     CHAR 0xE8, CHAR 0xE9, CHAR 0xEA, CHAR 0xEB,
   283     CHAR 0xEC, CHAR 0xED, CHAR 0xEE, CHAR 0xEF,
   284     CHAR 0xF0, CHAR 0xF1, CHAR 0xF2, CHAR 0xF3,
   285     CHAR 0xF4, CHAR 0xF5, CHAR 0xF6, CHAR 0xF7,
   286     CHAR 0xF8, CHAR 0xF9, CHAR 0xFA, CHAR 0xFB,
   287     CHAR 0xFC, CHAR 0xFD, CHAR 0xFE, CHAR 0xFF]"
   288 
   289 definition
   290   "Enum.enum_all P \<longleftrightarrow> list_all P (Enum.enum :: char list)"
   291 
   292 definition
   293   "Enum.enum_ex P \<longleftrightarrow> list_ex P (Enum.enum :: char list)"
   294 
   295 lemma enum_char_unfold:
   296   "Enum.enum = map char_of_nat [0..<256]"
   297 proof -
   298   have *: "Suc (Suc 0) = 2" by simp
   299   have "map nat_of_char Enum.enum = [0..<256]"
   300     by (simp add: enum_char_def upt_conv_Cons_Cons *)
   301   then have "map char_of_nat (map nat_of_char Enum.enum) =
   302     map char_of_nat [0..<256]" by simp
   303   then show ?thesis by (simp add: comp_def)
   304 qed
   305 
   306 instance proof
   307   show UNIV: "UNIV = set (Enum.enum :: char list)"
   308     by (simp add: enum_char_unfold UNIV_char_of_nat atLeast0LessThan)
   309   show "distinct (Enum.enum :: char list)"
   310     by (auto simp add: enum_char_unfold distinct_map intro: inj_onI)
   311   show "\<And>P. Enum.enum_all P \<longleftrightarrow> Ball (UNIV :: char set) P"
   312     by (simp add: UNIV enum_all_char_def list_all_iff)
   313   show "\<And>P. Enum.enum_ex P \<longleftrightarrow> Bex (UNIV :: char set) P"
   314     by (simp add: UNIV enum_ex_char_def list_ex_iff)
   315 qed
   316 
   317 end
   318 
   319 lemma char_of_integer_code [code]:
   320   "char_of_integer n = Enum.enum ! (nat_of_integer n mod 256)"
   321   by (simp add: char_of_integer_def enum_char_unfold)
   322 
   323 
   324 subsection \<open>Strings as dedicated type\<close>
   325 
   326 typedef literal = "UNIV :: string set"
   327   morphisms explode STR ..
   328 
   329 setup_lifting type_definition_literal
   330 
   331 lemma STR_inject' [simp]:
   332   "STR s = STR t \<longleftrightarrow> s = t"
   333   by transfer rule
   334 
   335 definition implode :: "string \<Rightarrow> String.literal"
   336 where
   337   [code del]: "implode = STR"
   338   
   339 instantiation literal :: size
   340 begin
   341 
   342 definition size_literal :: "literal \<Rightarrow> nat"
   343 where
   344   [code]: "size_literal (s::literal) = 0"
   345 
   346 instance ..
   347 
   348 end
   349 
   350 instantiation literal :: equal
   351 begin
   352 
   353 lift_definition equal_literal :: "literal \<Rightarrow> literal \<Rightarrow> bool" is "op =" .
   354 
   355 instance by intro_classes (transfer, simp)
   356 
   357 end
   358 
   359 declare equal_literal.rep_eq[code]
   360 
   361 lemma [code nbe]:
   362   fixes s :: "String.literal"
   363   shows "HOL.equal s s \<longleftrightarrow> True"
   364   by (simp add: equal)
   365 
   366 lifting_update literal.lifting
   367 lifting_forget literal.lifting
   368 
   369 subsection \<open>Code generator\<close>
   370 
   371 ML_file "Tools/string_code.ML"
   372 
   373 code_reserved SML string
   374 code_reserved OCaml string
   375 code_reserved Scala string
   376 
   377 code_printing
   378   type_constructor literal \<rightharpoonup>
   379     (SML) "string"
   380     and (OCaml) "string"
   381     and (Haskell) "String"
   382     and (Scala) "String"
   383 
   384 setup \<open>
   385   fold String_Code.add_literal_string ["SML", "OCaml", "Haskell", "Scala"]
   386 \<close>
   387 
   388 code_printing
   389   class_instance literal :: equal \<rightharpoonup>
   390     (Haskell) -
   391 | constant "HOL.equal :: literal \<Rightarrow> literal \<Rightarrow> bool" \<rightharpoonup>
   392     (SML) "!((_ : string) = _)"
   393     and (OCaml) "!((_ : string) = _)"
   394     and (Haskell) infix 4 "=="
   395     and (Scala) infixl 5 "=="
   396 
   397 setup \<open>Sign.map_naming (Name_Space.mandatory_path "Code")\<close>
   398 
   399 definition abort :: "literal \<Rightarrow> (unit \<Rightarrow> 'a) \<Rightarrow> 'a"
   400 where [simp, code del]: "abort _ f = f ()"
   401 
   402 lemma abort_cong: "msg = msg' ==> Code.abort msg f = Code.abort msg' f"
   403 by simp
   404 
   405 setup \<open>Sign.map_naming Name_Space.parent_path\<close>
   406 
   407 setup \<open>Code_Simp.map_ss (Simplifier.add_cong @{thm Code.abort_cong})\<close>
   408 
   409 code_printing constant Code.abort \<rightharpoonup>
   410     (SML) "!(raise/ Fail/ _)"
   411     and (OCaml) "failwith"
   412     and (Haskell) "!(error/ ::/ forall a./ String -> (() -> a) -> a)"
   413     and (Scala) "!{/ sys.error((_));/  ((_)).apply(())/ }"
   414 
   415 hide_type (open) literal
   416 
   417 hide_const (open) implode explode
   418 
   419 end