src/HOL/String.thy
author wenzelm
Sun Nov 26 21:08:32 2017 +0100 (22 months ago)
changeset 67091 1393c2340eec
parent 66331 f773691617c0
child 67399 eab6ce8368fa
permissions -rw-r--r--
more symbols;
     1 (* Author: Tobias Nipkow, Florian Haftmann, TU Muenchen *)
     2 
     3 section \<open>Character and string types\<close>
     4 
     5 theory String
     6 imports Enum
     7 begin
     8 
     9 subsection \<open>Characters and strings\<close>
    10 
    11 subsubsection \<open>Characters as finite algebraic type\<close>
    12 
    13 typedef char = "{n::nat. n < 256}"
    14   morphisms nat_of_char Abs_char
    15 proof
    16   show "Suc 0 \<in> {n. n < 256}" by simp
    17 qed
    18 
    19 setup_lifting type_definition_char
    20 
    21 definition char_of_nat :: "nat \<Rightarrow> char"
    22 where
    23   "char_of_nat n = Abs_char (n mod 256)"
    24 
    25 lemma char_cases [case_names char_of_nat, cases type: char]:
    26   "(\<And>n. c = char_of_nat n \<Longrightarrow> n < 256 \<Longrightarrow> P) \<Longrightarrow> P"
    27   by (cases c) (simp add: char_of_nat_def)
    28 
    29 lemma char_of_nat_of_char [simp]:
    30   "char_of_nat (nat_of_char c) = c"
    31   by (cases c) (simp add: char_of_nat_def Abs_char_inject Abs_char_inverse)
    32 
    33 lemma inj_nat_of_char:
    34   "inj nat_of_char"
    35 proof (rule injI)
    36   fix c d
    37   assume "nat_of_char c = nat_of_char d"
    38   then have "char_of_nat (nat_of_char c) = char_of_nat (nat_of_char d)"
    39     by simp
    40   then show "c = d"
    41     by simp
    42 qed
    43   
    44 lemma nat_of_char_eq_iff [simp]:
    45   "nat_of_char c = nat_of_char d \<longleftrightarrow> c = d"
    46   by (fact nat_of_char_inject)
    47 
    48 lemma nat_of_char_of_nat [simp]:
    49   "nat_of_char (char_of_nat n) = n mod 256"
    50   by (cases "char_of_nat n") (simp add: char_of_nat_def Abs_char_inject Abs_char_inverse)
    51 
    52 lemma char_of_nat_mod_256 [simp]:
    53   "char_of_nat (n mod 256) = char_of_nat n"
    54   by (cases "char_of_nat (n mod 256)") (simp add: char_of_nat_def)
    55 
    56 lemma char_of_nat_quasi_inj [simp]:
    57   "char_of_nat m = char_of_nat n \<longleftrightarrow> m mod 256 = n mod 256"
    58   by (simp add: char_of_nat_def Abs_char_inject)
    59 
    60 lemma inj_on_char_of_nat [simp]:
    61   "inj_on char_of_nat {..<256}"
    62   by (rule inj_onI) simp
    63 
    64 lemma nat_of_char_mod_256 [simp]:
    65   "nat_of_char c mod 256 = nat_of_char c"
    66   by (cases c) simp
    67 
    68 lemma nat_of_char_less_256 [simp]:
    69   "nat_of_char c < 256"
    70 proof -
    71   have "nat_of_char c mod 256 < 256"
    72     by arith
    73   then show ?thesis by simp
    74 qed
    75 
    76 lemma UNIV_char_of_nat:
    77   "UNIV = char_of_nat ` {..<256}"
    78 proof -
    79   { fix c
    80     have "c \<in> char_of_nat ` {..<256}"
    81       by (cases c) auto
    82   } then show ?thesis by auto
    83 qed
    84 
    85 lemma card_UNIV_char:
    86   "card (UNIV :: char set) = 256"
    87   by (auto simp add: UNIV_char_of_nat card_image)
    88 
    89 lemma range_nat_of_char:
    90   "range nat_of_char = {..<256}"
    91   by (auto simp add: UNIV_char_of_nat image_image image_def)
    92 
    93 
    94 subsubsection \<open>Character literals as variant of numerals\<close>
    95 
    96 instantiation char :: zero
    97 begin
    98 
    99 definition zero_char :: char
   100   where "0 = char_of_nat 0"
   101 
   102 instance ..
   103 
   104 end
   105 
   106 definition Char :: "num \<Rightarrow> char"
   107   where "Char k = char_of_nat (numeral k)"
   108 
   109 code_datatype "0 :: char" Char
   110 
   111 lemma nat_of_char_zero [simp]:
   112   "nat_of_char 0 = 0"
   113   by (simp add: zero_char_def)
   114 
   115 lemma nat_of_char_Char [simp]:
   116   "nat_of_char (Char k) = numeral k mod 256"
   117   by (simp add: Char_def)
   118 
   119 lemma Char_eq_Char_iff:
   120   "Char k = Char l \<longleftrightarrow> numeral k mod (256 :: nat) = numeral l mod 256" (is "?P \<longleftrightarrow> ?Q")
   121 proof -
   122   have "?P \<longleftrightarrow> nat_of_char (Char k) = nat_of_char (Char l)"
   123     by (rule sym, rule inj_eq) (fact inj_nat_of_char)
   124   also have "nat_of_char (Char k) = nat_of_char (Char l) \<longleftrightarrow> ?Q"
   125     by (simp only: Char_def nat_of_char_of_nat)
   126   finally show ?thesis .
   127 qed
   128 
   129 lemma zero_eq_Char_iff:
   130   "0 = Char k \<longleftrightarrow> numeral k mod (256 :: nat) = 0"
   131   by (auto simp add: zero_char_def Char_def)
   132 
   133 lemma Char_eq_zero_iff:
   134   "Char k = 0 \<longleftrightarrow> numeral k mod (256 :: nat) = 0"
   135   by (auto simp add: zero_char_def Char_def) 
   136 
   137 simproc_setup char_eq
   138   ("Char m = Char n" | "Char m = 0" | "0 = Char n") = \<open>
   139   let
   140     val ss = put_simpset HOL_ss @{context}
   141       |> fold Simplifier.add_simp @{thms Char_eq_Char_iff zero_eq_Char_iff Char_eq_zero_iff cong_exp_iff_simps}
   142       |> simpset_of 
   143   in
   144     fn _ => fn ctxt => SOME o Simplifier.rewrite (put_simpset ss ctxt)
   145   end
   146 \<close>
   147 
   148 definition integer_of_char :: "char \<Rightarrow> integer"
   149 where
   150   "integer_of_char = integer_of_nat \<circ> nat_of_char"
   151 
   152 definition char_of_integer :: "integer \<Rightarrow> char"
   153 where
   154   "char_of_integer = char_of_nat \<circ> nat_of_integer"
   155 
   156 lemma integer_of_char_zero [simp, code]:
   157   "integer_of_char 0 = 0"
   158   by (simp add: integer_of_char_def integer_of_nat_def)
   159 
   160 lemma integer_of_char_Char [simp]:
   161   "integer_of_char (Char k) = numeral k mod 256"
   162   by (simp only: integer_of_char_def integer_of_nat_def comp_apply nat_of_char_Char map_fun_def
   163     id_apply zmod_int modulo_integer.abs_eq [symmetric]) simp
   164 
   165 lemma integer_of_char_Char_code [code]:
   166   "integer_of_char (Char k) = integer_of_num k mod 256"
   167   by simp
   168   
   169 lemma nat_of_char_code [code]:
   170   "nat_of_char = nat_of_integer \<circ> integer_of_char"
   171   by (simp add: integer_of_char_def fun_eq_iff)
   172 
   173 lemma char_of_nat_code [code]:
   174   "char_of_nat = char_of_integer \<circ> integer_of_nat"
   175   by (simp add: char_of_integer_def fun_eq_iff)
   176 
   177 instantiation char :: equal
   178 begin
   179 
   180 definition equal_char
   181   where "equal_char (c :: char) d \<longleftrightarrow> c = d"
   182 
   183 instance
   184   by standard (simp add: equal_char_def)
   185 
   186 end
   187 
   188 lemma equal_char_simps [code]:
   189   "HOL.equal (0::char) 0 \<longleftrightarrow> True"
   190   "HOL.equal (Char k) (Char l) \<longleftrightarrow> HOL.equal (numeral k mod 256 :: nat) (numeral l mod 256)"
   191   "HOL.equal 0 (Char k) \<longleftrightarrow> HOL.equal (numeral k mod 256 :: nat) 0"
   192   "HOL.equal (Char k) 0 \<longleftrightarrow> HOL.equal (numeral k mod 256 :: nat) 0"
   193   by (simp_all only: equal refl Char_eq_Char_iff zero_eq_Char_iff Char_eq_zero_iff)
   194 
   195 syntax
   196   "_Char" :: "str_position \<Rightarrow> char"    ("CHR _")
   197   "_Char_ord" :: "num_const \<Rightarrow> char"   ("CHR _")
   198 
   199 type_synonym string = "char list"
   200 
   201 syntax
   202   "_String" :: "str_position => string"    ("_")
   203 
   204 ML_file "Tools/string_syntax.ML"
   205 
   206 instantiation char :: enum
   207 begin
   208 
   209 definition
   210   "Enum.enum = [0, CHR 0x01, CHR 0x02, CHR 0x03,
   211     CHR 0x04, CHR 0x05, CHR 0x06, CHR 0x07,
   212     CHR 0x08, CHR 0x09, CHR ''\<newline>'', CHR 0x0B,
   213     CHR 0x0C, CHR 0x0D, CHR 0x0E, CHR 0x0F,
   214     CHR 0x10, CHR 0x11, CHR 0x12, CHR 0x13,
   215     CHR 0x14, CHR 0x15, CHR 0x16, CHR 0x17,
   216     CHR 0x18, CHR 0x19, CHR 0x1A, CHR 0x1B,
   217     CHR 0x1C, CHR 0x1D, CHR 0x1E, CHR 0x1F,
   218     CHR '' '', CHR ''!'', CHR 0x22, CHR ''#'',
   219     CHR ''$'', CHR ''%'', CHR ''&'', CHR 0x27,
   220     CHR ''('', CHR '')'', CHR ''*'', CHR ''+'',
   221     CHR '','', CHR ''-'', CHR ''.'', CHR ''/'',
   222     CHR ''0'', CHR ''1'', CHR ''2'', CHR ''3'',
   223     CHR ''4'', CHR ''5'', CHR ''6'', CHR ''7'',
   224     CHR ''8'', CHR ''9'', CHR '':'', CHR '';'',
   225     CHR ''<'', CHR ''='', CHR ''>'', CHR ''?'',
   226     CHR ''@'', CHR ''A'', CHR ''B'', CHR ''C'',
   227     CHR ''D'', CHR ''E'', CHR ''F'', CHR ''G'',
   228     CHR ''H'', CHR ''I'', CHR ''J'', CHR ''K'',
   229     CHR ''L'', CHR ''M'', CHR ''N'', CHR ''O'',
   230     CHR ''P'', CHR ''Q'', CHR ''R'', CHR ''S'',
   231     CHR ''T'', CHR ''U'', CHR ''V'', CHR ''W'',
   232     CHR ''X'', CHR ''Y'', CHR ''Z'', CHR ''['',
   233     CHR 0x5C, CHR '']'', CHR ''^'', CHR ''_'',
   234     CHR 0x60, CHR ''a'', CHR ''b'', CHR ''c'',
   235     CHR ''d'', CHR ''e'', CHR ''f'', CHR ''g'',
   236     CHR ''h'', CHR ''i'', CHR ''j'', CHR ''k'',
   237     CHR ''l'', CHR ''m'', CHR ''n'', CHR ''o'',
   238     CHR ''p'', CHR ''q'', CHR ''r'', CHR ''s'',
   239     CHR ''t'', CHR ''u'', CHR ''v'', CHR ''w'',
   240     CHR ''x'', CHR ''y'', CHR ''z'', CHR ''{'',
   241     CHR ''|'', CHR ''}'', CHR ''~'', CHR 0x7F,
   242     CHR 0x80, CHR 0x81, CHR 0x82, CHR 0x83,
   243     CHR 0x84, CHR 0x85, CHR 0x86, CHR 0x87,
   244     CHR 0x88, CHR 0x89, CHR 0x8A, CHR 0x8B,
   245     CHR 0x8C, CHR 0x8D, CHR 0x8E, CHR 0x8F,
   246     CHR 0x90, CHR 0x91, CHR 0x92, CHR 0x93,
   247     CHR 0x94, CHR 0x95, CHR 0x96, CHR 0x97,
   248     CHR 0x98, CHR 0x99, CHR 0x9A, CHR 0x9B,
   249     CHR 0x9C, CHR 0x9D, CHR 0x9E, CHR 0x9F,
   250     CHR 0xA0, CHR 0xA1, CHR 0xA2, CHR 0xA3,
   251     CHR 0xA4, CHR 0xA5, CHR 0xA6, CHR 0xA7,
   252     CHR 0xA8, CHR 0xA9, CHR 0xAA, CHR 0xAB,
   253     CHR 0xAC, CHR 0xAD, CHR 0xAE, CHR 0xAF,
   254     CHR 0xB0, CHR 0xB1, CHR 0xB2, CHR 0xB3,
   255     CHR 0xB4, CHR 0xB5, CHR 0xB6, CHR 0xB7,
   256     CHR 0xB8, CHR 0xB9, CHR 0xBA, CHR 0xBB,
   257     CHR 0xBC, CHR 0xBD, CHR 0xBE, CHR 0xBF,
   258     CHR 0xC0, CHR 0xC1, CHR 0xC2, CHR 0xC3,
   259     CHR 0xC4, CHR 0xC5, CHR 0xC6, CHR 0xC7,
   260     CHR 0xC8, CHR 0xC9, CHR 0xCA, CHR 0xCB,
   261     CHR 0xCC, CHR 0xCD, CHR 0xCE, CHR 0xCF,
   262     CHR 0xD0, CHR 0xD1, CHR 0xD2, CHR 0xD3,
   263     CHR 0xD4, CHR 0xD5, CHR 0xD6, CHR 0xD7,
   264     CHR 0xD8, CHR 0xD9, CHR 0xDA, CHR 0xDB,
   265     CHR 0xDC, CHR 0xDD, CHR 0xDE, CHR 0xDF,
   266     CHR 0xE0, CHR 0xE1, CHR 0xE2, CHR 0xE3,
   267     CHR 0xE4, CHR 0xE5, CHR 0xE6, CHR 0xE7,
   268     CHR 0xE8, CHR 0xE9, CHR 0xEA, CHR 0xEB,
   269     CHR 0xEC, CHR 0xED, CHR 0xEE, CHR 0xEF,
   270     CHR 0xF0, CHR 0xF1, CHR 0xF2, CHR 0xF3,
   271     CHR 0xF4, CHR 0xF5, CHR 0xF6, CHR 0xF7,
   272     CHR 0xF8, CHR 0xF9, CHR 0xFA, CHR 0xFB,
   273     CHR 0xFC, CHR 0xFD, CHR 0xFE, CHR 0xFF]"
   274 
   275 definition
   276   "Enum.enum_all P \<longleftrightarrow> list_all P (Enum.enum :: char list)"
   277 
   278 definition
   279   "Enum.enum_ex P \<longleftrightarrow> list_ex P (Enum.enum :: char list)"
   280 
   281 lemma enum_char_unfold:
   282   "Enum.enum = map char_of_nat [0..<256]"
   283 proof -
   284   have *: "Suc (Suc 0) = 2" by simp
   285   have "map nat_of_char Enum.enum = [0..<256]"
   286     by (simp add: enum_char_def upt_conv_Cons_Cons *)
   287   then have "map char_of_nat (map nat_of_char Enum.enum) =
   288     map char_of_nat [0..<256]" by simp
   289   then show ?thesis by (simp add: comp_def)
   290 qed
   291 
   292 instance proof
   293   show UNIV: "UNIV = set (Enum.enum :: char list)"
   294     by (simp add: enum_char_unfold UNIV_char_of_nat atLeast0LessThan)
   295   show "distinct (Enum.enum :: char list)"
   296     by (auto simp add: enum_char_unfold distinct_map intro: inj_onI)
   297   show "\<And>P. Enum.enum_all P \<longleftrightarrow> Ball (UNIV :: char set) P"
   298     by (simp add: UNIV enum_all_char_def list_all_iff)
   299   show "\<And>P. Enum.enum_ex P \<longleftrightarrow> Bex (UNIV :: char set) P"
   300     by (simp add: UNIV enum_ex_char_def list_ex_iff)
   301 qed
   302 
   303 end
   304 
   305 lemma char_of_integer_code [code]:
   306   "char_of_integer n = Enum.enum ! (nat_of_integer n mod 256)"
   307   by (simp add: char_of_integer_def enum_char_unfold)
   308 
   309 lifting_update char.lifting
   310 lifting_forget char.lifting
   311 
   312 
   313 subsection \<open>Strings as dedicated type\<close>
   314 
   315 typedef literal = "UNIV :: string set"
   316   morphisms explode STR ..
   317 
   318 setup_lifting type_definition_literal
   319 
   320 lemma STR_inject' [simp]:
   321   "STR s = STR t \<longleftrightarrow> s = t"
   322   by transfer rule
   323 
   324 definition implode :: "string \<Rightarrow> String.literal"
   325 where
   326   [code del]: "implode = STR"
   327 
   328 instantiation literal :: size
   329 begin
   330 
   331 definition size_literal :: "literal \<Rightarrow> nat"
   332 where
   333   [code]: "size_literal (s::literal) = 0"
   334 
   335 instance ..
   336 
   337 end
   338 
   339 instantiation literal :: equal
   340 begin
   341 
   342 lift_definition equal_literal :: "literal \<Rightarrow> literal \<Rightarrow> bool" is "op =" .
   343 
   344 instance by intro_classes (transfer, simp)
   345 
   346 end
   347 
   348 declare equal_literal.rep_eq[code]
   349 
   350 lemma [code nbe]:
   351   fixes s :: "String.literal"
   352   shows "HOL.equal s s \<longleftrightarrow> True"
   353   by (simp add: equal)
   354 
   355 lifting_update literal.lifting
   356 lifting_forget literal.lifting
   357 
   358   
   359 subsection \<open>Dedicated conversion for generated computations\<close>
   360 
   361 definition char_of_num :: "num \<Rightarrow> char"
   362   where "char_of_num = char_of_nat \<circ> nat_of_num"
   363 
   364 lemma [code_computation_unfold]:
   365   "Char = char_of_num"
   366   by (simp add: fun_eq_iff char_of_num_def nat_of_num_numeral Char_def)
   367 
   368 
   369 subsection \<open>Code generator\<close>
   370 
   371 ML_file "Tools/string_code.ML"
   372 
   373 code_reserved SML string
   374 code_reserved OCaml string
   375 code_reserved Scala string
   376 
   377 code_printing
   378   type_constructor literal \<rightharpoonup>
   379     (SML) "string"
   380     and (OCaml) "string"
   381     and (Haskell) "String"
   382     and (Scala) "String"
   383 
   384 setup \<open>
   385   fold String_Code.add_literal_string ["SML", "OCaml", "Haskell", "Scala"]
   386 \<close>
   387 
   388 code_printing
   389   class_instance literal :: equal \<rightharpoonup>
   390     (Haskell) -
   391 | constant "HOL.equal :: literal \<Rightarrow> literal \<Rightarrow> bool" \<rightharpoonup>
   392     (SML) "!((_ : string) = _)"
   393     and (OCaml) "!((_ : string) = _)"
   394     and (Haskell) infix 4 "=="
   395     and (Scala) infixl 5 "=="
   396 
   397 setup \<open>Sign.map_naming (Name_Space.mandatory_path "Code")\<close>
   398 
   399 definition abort :: "literal \<Rightarrow> (unit \<Rightarrow> 'a) \<Rightarrow> 'a"
   400 where [simp, code del]: "abort _ f = f ()"
   401 
   402 lemma abort_cong: "msg = msg' ==> Code.abort msg f = Code.abort msg' f"
   403 by simp
   404 
   405 setup \<open>Sign.map_naming Name_Space.parent_path\<close>
   406 
   407 setup \<open>Code_Simp.map_ss (Simplifier.add_cong @{thm Code.abort_cong})\<close>
   408 
   409 code_printing constant Code.abort \<rightharpoonup>
   410     (SML) "!(raise/ Fail/ _)"
   411     and (OCaml) "failwith"
   412     and (Haskell) "!(error/ ::/ forall a./ String -> (() -> a) -> a)"
   413     and (Scala) "!{/ sys.error((_));/  ((_)).apply(())/ }"
   414 
   415 hide_type (open) literal
   416 
   417 hide_const (open) implode explode
   418 
   419 end