src/HOL/String.thy
author haftmann
Sun Mar 10 15:16:45 2019 +0000 (5 weeks ago)
changeset 69906 55534affe445
parent 69879 2731278dfff9
permissions -rw-r--r--
migrated from Nums to Zarith as library for OCaml integer arithmetic
     1 (* Author: Tobias Nipkow, Florian Haftmann, TU Muenchen *)
     2 
     3 section \<open>Character and string types\<close>
     4 
     5 theory String
     6 imports Enum
     7 begin
     8 
     9 subsection \<open>Strings as list of bytes\<close>
    10 
    11 text \<open>
    12   When modelling strings, we follow the approach given
    13   in \<^url>\<open>https://utf8everywhere.org/\<close>:
    14 
    15   \<^item> Strings are a list of bytes (8 bit).
    16 
    17   \<^item> Byte values from 0 to 127 are US-ASCII.
    18 
    19   \<^item> Byte values from 128 to 255 are uninterpreted blobs.
    20 \<close>
    21 
    22 subsubsection \<open>Bytes as datatype\<close>
    23 
    24 datatype char =
    25   Char (digit0: bool) (digit1: bool) (digit2: bool) (digit3: bool)
    26        (digit4: bool) (digit5: bool) (digit6: bool) (digit7: bool)
    27 
    28 context comm_semiring_1
    29 begin
    30 
    31 definition of_char :: "char \<Rightarrow> 'a"
    32   where "of_char c = ((((((of_bool (digit7 c) * 2
    33     + of_bool (digit6 c)) * 2
    34     + of_bool (digit5 c)) * 2
    35     + of_bool (digit4 c)) * 2
    36     + of_bool (digit3 c)) * 2
    37     + of_bool (digit2 c)) * 2
    38     + of_bool (digit1 c)) * 2
    39     + of_bool (digit0 c)"
    40 
    41 lemma of_char_Char [simp]:
    42   "of_char (Char b0 b1 b2 b3 b4 b5 b6 b7) =
    43     foldr (\<lambda>b k. of_bool b + k * 2) [b0, b1, b2, b3, b4, b5, b6, b7] 0"
    44   by (simp add: of_char_def ac_simps)
    45 
    46 end
    47 
    48 context semiring_parity
    49 begin
    50 
    51 definition char_of :: "'a \<Rightarrow> char"
    52   where "char_of n = Char (odd n) (odd (drop_bit 1 n))
    53     (odd (drop_bit 2 n)) (odd (drop_bit 3 n))
    54     (odd (drop_bit 4 n)) (odd (drop_bit 5 n))
    55     (odd (drop_bit 6 n)) (odd (drop_bit 7 n))"
    56 
    57 lemma char_of_char [simp]:
    58   "char_of (of_char c) = c"
    59 proof (cases c)
    60   have **: "drop_bit n (q * 2 + of_bool d) = drop_bit (n - 1) q + drop_bit n (of_bool d)"
    61     if "n > 0" for q :: 'a and n :: nat and d :: bool
    62     using that by (cases n) simp_all
    63   case (Char d0 d1 d2 d3 d4 d5 d6 d7)
    64   then show ?thesis
    65     by (simp only: of_char_def char_of_def char.simps char.sel drop_bit_of_bool **) simp
    66 qed
    67 
    68 lemma char_of_comp_of_char [simp]:
    69   "char_of \<circ> of_char = id"
    70   by (simp add: fun_eq_iff)
    71 
    72 lemma inj_of_char:
    73   "inj of_char"
    74 proof (rule injI)
    75   fix c d
    76   assume "of_char c = of_char d"
    77   then have "char_of (of_char c) = char_of (of_char d)"
    78     by simp
    79   then show "c = d"
    80     by simp
    81 qed
    82   
    83 lemma of_char_eq_iff [simp]:
    84   "of_char c = of_char d \<longleftrightarrow> c = d"
    85   by (simp add: inj_eq inj_of_char)
    86 
    87 lemma of_char_of [simp]:
    88   "of_char (char_of a) = a mod 256"
    89 proof -
    90   have *: "{0::nat..<8} = {0, 1, 2, 3, 4, 5, 6, 7}"
    91     by auto
    92   have "of_char (char_of (take_bit 8 a)) =
    93     (\<Sum>k\<in>{0, 1, 2, 3, 4, 5, 6, 7}. push_bit k (of_bool (odd (drop_bit k a))))"
    94     by (simp add: of_char_def char_of_def push_bit_of_1 drop_bit_take_bit)
    95   also have "\<dots> = take_bit 8 a"
    96     using * take_bit_sum [of 8 a] by simp
    97   also have "char_of(take_bit 8 a) = char_of a"
    98     by (simp add: char_of_def drop_bit_take_bit)
    99   finally show ?thesis
   100     by (simp add: take_bit_eq_mod)
   101 qed
   102 
   103 lemma char_of_mod_256 [simp]:
   104   "char_of (n mod 256) = char_of n"
   105   by (metis char_of_char of_char_of)
   106 
   107 lemma of_char_mod_256 [simp]:
   108   "of_char c mod 256 = of_char c"
   109   by (metis char_of_char of_char_of)
   110 
   111 lemma char_of_quasi_inj [simp]:
   112   "char_of m = char_of n \<longleftrightarrow> m mod 256 = n mod 256"
   113   by (metis char_of_mod_256 of_char_of)
   114 
   115 lemma char_of_nat_eq_iff:
   116   "char_of n = c \<longleftrightarrow> take_bit 8 n = of_char c"
   117   by (simp add: take_bit_eq_mod) (use of_char_eq_iff in fastforce)
   118 
   119 lemma char_of_nat [simp]:
   120   "char_of (of_nat n) = char_of n"
   121   by (simp add: char_of_def String.char_of_def drop_bit_of_nat)
   122 
   123 end
   124 
   125 lemma inj_on_char_of_nat [simp]:
   126   "inj_on char_of {0::nat..<256}"
   127   by (rule inj_onI) simp
   128 
   129 lemma nat_of_char_less_256 [simp]:
   130   "of_char c < (256 :: nat)"
   131 proof -
   132   have "of_char c mod (256 :: nat) < 256"
   133     by arith
   134   then show ?thesis by simp
   135 qed
   136 
   137 lemma range_nat_of_char:
   138   "range of_char = {0::nat..<256}"
   139 proof (rule; rule)
   140   fix n :: nat
   141   assume "n \<in> range of_char"
   142   then show "n \<in> {0..<256}"
   143     by auto
   144 next
   145   fix n :: nat
   146   assume "n \<in> {0..<256}"
   147   then have "n = of_char (char_of n)"
   148     by simp
   149   then show "n \<in> range of_char"
   150     by (rule range_eqI)
   151 qed
   152 
   153 lemma UNIV_char_of_nat:
   154   "UNIV = char_of ` {0::nat..<256}"
   155 proof -
   156   have "range (of_char :: char \<Rightarrow> nat) = of_char ` char_of ` {0::nat..<256}"
   157     by (auto simp add: range_nat_of_char intro!: image_eqI)
   158   with inj_of_char [where ?'a = nat] show ?thesis 
   159     by (simp add: inj_image_eq_iff)
   160 qed
   161 
   162 lemma card_UNIV_char:
   163   "card (UNIV :: char set) = 256"
   164   by (auto simp add: UNIV_char_of_nat card_image)
   165 
   166 context
   167   includes lifting_syntax integer.lifting natural.lifting
   168 begin
   169 
   170 lemma [transfer_rule]:
   171   "(pcr_integer ===> (=)) (char_of :: int \<Rightarrow> char) (char_of :: integer \<Rightarrow> char)"
   172   by (unfold char_of_def [abs_def]) transfer_prover
   173 
   174 lemma [transfer_rule]:
   175   "((=) ===> pcr_integer) (of_char :: char \<Rightarrow> int) (of_char :: char \<Rightarrow> integer)"
   176   by (unfold of_char_def [abs_def]) transfer_prover
   177 
   178 lemma [transfer_rule]:
   179   "(pcr_natural ===> (=)) (char_of :: nat \<Rightarrow> char) (char_of :: natural \<Rightarrow> char)"
   180   by (unfold char_of_def [abs_def]) transfer_prover
   181 
   182 lemma [transfer_rule]:
   183   "((=) ===> pcr_natural) (of_char :: char \<Rightarrow> nat) (of_char :: char \<Rightarrow> natural)"
   184   by (unfold of_char_def [abs_def]) transfer_prover
   185 
   186 end
   187 
   188 lifting_update integer.lifting
   189 lifting_forget integer.lifting
   190 
   191 lifting_update natural.lifting
   192 lifting_forget natural.lifting
   193 
   194 syntax
   195   "_Char" :: "str_position \<Rightarrow> char"    ("CHR _")
   196   "_Char_ord" :: "num_const \<Rightarrow> char"   ("CHR _")
   197 
   198 type_synonym string = "char list"
   199 
   200 syntax
   201   "_String" :: "str_position \<Rightarrow> string"    ("_")
   202 
   203 ML_file \<open>Tools/string_syntax.ML\<close>
   204 
   205 instantiation char :: enum
   206 begin
   207 
   208 definition
   209   "Enum.enum = [
   210     CHR 0x00, CHR 0x01, CHR 0x02, CHR 0x03,
   211     CHR 0x04, CHR 0x05, CHR 0x06, CHR 0x07,
   212     CHR 0x08, CHR 0x09, CHR ''\<newline>'', CHR 0x0B,
   213     CHR 0x0C, CHR 0x0D, CHR 0x0E, CHR 0x0F,
   214     CHR 0x10, CHR 0x11, CHR 0x12, CHR 0x13,
   215     CHR 0x14, CHR 0x15, CHR 0x16, CHR 0x17,
   216     CHR 0x18, CHR 0x19, CHR 0x1A, CHR 0x1B,
   217     CHR 0x1C, CHR 0x1D, CHR 0x1E, CHR 0x1F,
   218     CHR '' '', CHR ''!'', CHR 0x22, CHR ''#'',
   219     CHR ''$'', CHR ''%'', CHR ''&'', CHR 0x27,
   220     CHR ''('', CHR '')'', CHR ''*'', CHR ''+'',
   221     CHR '','', CHR ''-'', CHR ''.'', CHR ''/'',
   222     CHR ''0'', CHR ''1'', CHR ''2'', CHR ''3'',
   223     CHR ''4'', CHR ''5'', CHR ''6'', CHR ''7'',
   224     CHR ''8'', CHR ''9'', CHR '':'', CHR '';'',
   225     CHR ''<'', CHR ''='', CHR ''>'', CHR ''?'',
   226     CHR ''@'', CHR ''A'', CHR ''B'', CHR ''C'',
   227     CHR ''D'', CHR ''E'', CHR ''F'', CHR ''G'',
   228     CHR ''H'', CHR ''I'', CHR ''J'', CHR ''K'',
   229     CHR ''L'', CHR ''M'', CHR ''N'', CHR ''O'',
   230     CHR ''P'', CHR ''Q'', CHR ''R'', CHR ''S'',
   231     CHR ''T'', CHR ''U'', CHR ''V'', CHR ''W'',
   232     CHR ''X'', CHR ''Y'', CHR ''Z'', CHR ''['',
   233     CHR 0x5C, CHR '']'', CHR ''^'', CHR ''_'',
   234     CHR 0x60, CHR ''a'', CHR ''b'', CHR ''c'',
   235     CHR ''d'', CHR ''e'', CHR ''f'', CHR ''g'',
   236     CHR ''h'', CHR ''i'', CHR ''j'', CHR ''k'',
   237     CHR ''l'', CHR ''m'', CHR ''n'', CHR ''o'',
   238     CHR ''p'', CHR ''q'', CHR ''r'', CHR ''s'',
   239     CHR ''t'', CHR ''u'', CHR ''v'', CHR ''w'',
   240     CHR ''x'', CHR ''y'', CHR ''z'', CHR ''{'',
   241     CHR ''|'', CHR ''}'', CHR ''~'', CHR 0x7F,
   242     CHR 0x80, CHR 0x81, CHR 0x82, CHR 0x83,
   243     CHR 0x84, CHR 0x85, CHR 0x86, CHR 0x87,
   244     CHR 0x88, CHR 0x89, CHR 0x8A, CHR 0x8B,
   245     CHR 0x8C, CHR 0x8D, CHR 0x8E, CHR 0x8F,
   246     CHR 0x90, CHR 0x91, CHR 0x92, CHR 0x93,
   247     CHR 0x94, CHR 0x95, CHR 0x96, CHR 0x97,
   248     CHR 0x98, CHR 0x99, CHR 0x9A, CHR 0x9B,
   249     CHR 0x9C, CHR 0x9D, CHR 0x9E, CHR 0x9F,
   250     CHR 0xA0, CHR 0xA1, CHR 0xA2, CHR 0xA3,
   251     CHR 0xA4, CHR 0xA5, CHR 0xA6, CHR 0xA7,
   252     CHR 0xA8, CHR 0xA9, CHR 0xAA, CHR 0xAB,
   253     CHR 0xAC, CHR 0xAD, CHR 0xAE, CHR 0xAF,
   254     CHR 0xB0, CHR 0xB1, CHR 0xB2, CHR 0xB3,
   255     CHR 0xB4, CHR 0xB5, CHR 0xB6, CHR 0xB7,
   256     CHR 0xB8, CHR 0xB9, CHR 0xBA, CHR 0xBB,
   257     CHR 0xBC, CHR 0xBD, CHR 0xBE, CHR 0xBF,
   258     CHR 0xC0, CHR 0xC1, CHR 0xC2, CHR 0xC3,
   259     CHR 0xC4, CHR 0xC5, CHR 0xC6, CHR 0xC7,
   260     CHR 0xC8, CHR 0xC9, CHR 0xCA, CHR 0xCB,
   261     CHR 0xCC, CHR 0xCD, CHR 0xCE, CHR 0xCF,
   262     CHR 0xD0, CHR 0xD1, CHR 0xD2, CHR 0xD3,
   263     CHR 0xD4, CHR 0xD5, CHR 0xD6, CHR 0xD7,
   264     CHR 0xD8, CHR 0xD9, CHR 0xDA, CHR 0xDB,
   265     CHR 0xDC, CHR 0xDD, CHR 0xDE, CHR 0xDF,
   266     CHR 0xE0, CHR 0xE1, CHR 0xE2, CHR 0xE3,
   267     CHR 0xE4, CHR 0xE5, CHR 0xE6, CHR 0xE7,
   268     CHR 0xE8, CHR 0xE9, CHR 0xEA, CHR 0xEB,
   269     CHR 0xEC, CHR 0xED, CHR 0xEE, CHR 0xEF,
   270     CHR 0xF0, CHR 0xF1, CHR 0xF2, CHR 0xF3,
   271     CHR 0xF4, CHR 0xF5, CHR 0xF6, CHR 0xF7,
   272     CHR 0xF8, CHR 0xF9, CHR 0xFA, CHR 0xFB,
   273     CHR 0xFC, CHR 0xFD, CHR 0xFE, CHR 0xFF]"
   274 
   275 definition
   276   "Enum.enum_all P \<longleftrightarrow> list_all P (Enum.enum :: char list)"
   277 
   278 definition
   279   "Enum.enum_ex P \<longleftrightarrow> list_ex P (Enum.enum :: char list)"
   280 
   281 lemma enum_char_unfold:
   282   "Enum.enum = map char_of [0..<256]"
   283 proof -
   284   have "map (of_char :: char \<Rightarrow> nat) Enum.enum = [0..<256]"
   285     by (simp add: enum_char_def of_char_def upt_conv_Cons_Cons numeral_2_eq_2 [symmetric])
   286   then have "map char_of (map (of_char :: char \<Rightarrow> nat) Enum.enum) =
   287     map char_of [0..<256]"
   288     by simp
   289   then show ?thesis
   290     by simp
   291 qed
   292 
   293 instance proof
   294   show UNIV: "UNIV = set (Enum.enum :: char list)"
   295     by (simp add: enum_char_unfold UNIV_char_of_nat atLeast0LessThan)
   296   show "distinct (Enum.enum :: char list)"
   297     by (auto simp add: enum_char_unfold distinct_map intro: inj_onI)
   298   show "\<And>P. Enum.enum_all P \<longleftrightarrow> Ball (UNIV :: char set) P"
   299     by (simp add: UNIV enum_all_char_def list_all_iff)
   300   show "\<And>P. Enum.enum_ex P \<longleftrightarrow> Bex (UNIV :: char set) P"
   301     by (simp add: UNIV enum_ex_char_def list_ex_iff)
   302 qed
   303 
   304 end
   305 
   306 lemma linorder_char:
   307   "class.linorder (\<lambda>c d. of_char c \<le> (of_char d :: nat)) (\<lambda>c d. of_char c < (of_char d :: nat))"
   308   by standard auto
   309 
   310 text \<open>Optimized version for execution\<close>
   311 
   312 definition char_of_integer :: "integer \<Rightarrow> char"
   313   where [code_abbrev]: "char_of_integer = char_of"
   314 
   315 definition integer_of_char :: "char \<Rightarrow> integer"
   316   where [code_abbrev]: "integer_of_char = of_char"
   317 
   318 lemma char_of_integer_code [code]:
   319   "char_of_integer k = (let
   320      (q0, b0) = bit_cut_integer k;
   321      (q1, b1) = bit_cut_integer q0;
   322      (q2, b2) = bit_cut_integer q1;
   323      (q3, b3) = bit_cut_integer q2;
   324      (q4, b4) = bit_cut_integer q3;
   325      (q5, b5) = bit_cut_integer q4;
   326      (q6, b6) = bit_cut_integer q5;
   327      (_, b7) = bit_cut_integer q6
   328     in Char b0 b1 b2 b3 b4 b5 b6 b7)"
   329   by (simp add: bit_cut_integer_def char_of_integer_def char_of_def div_mult2_numeral_eq odd_iff_mod_2_eq_one drop_bit_eq_div)
   330 
   331 lemma integer_of_char_code [code]:
   332   "integer_of_char (Char b0 b1 b2 b3 b4 b5 b6 b7) =
   333     ((((((of_bool b7 * 2 + of_bool b6) * 2 +
   334       of_bool b5) * 2 + of_bool b4) * 2 +
   335         of_bool b3) * 2 + of_bool b2) * 2 +
   336           of_bool b1) * 2 + of_bool b0"
   337   by (simp only: integer_of_char_def of_char_def char.sel)
   338 
   339 
   340 subsection \<open>Strings as dedicated type for target language code generation\<close>
   341 
   342 subsubsection \<open>Logical specification\<close>
   343 
   344 context
   345 begin
   346 
   347 qualified definition ascii_of :: "char \<Rightarrow> char"
   348   where "ascii_of c = Char (digit0 c) (digit1 c) (digit2 c) (digit3 c) (digit4 c) (digit5 c) (digit6 c) False"
   349 
   350 qualified lemma ascii_of_Char [simp]:
   351   "ascii_of (Char b0 b1 b2 b3 b4 b5 b6 b7) = Char b0 b1 b2 b3 b4 b5 b6 False"
   352   by (simp add: ascii_of_def)
   353 
   354 qualified lemma not_digit7_ascii_of [simp]:
   355   "\<not> digit7 (ascii_of c)"
   356   by (simp add: ascii_of_def)
   357 
   358 qualified lemma ascii_of_idem:
   359   "ascii_of c = c" if "\<not> digit7 c"
   360   using that by (cases c) simp
   361 
   362 qualified lemma char_of_ascii_of [simp]:
   363   "of_char (ascii_of c) = take_bit 7 (of_char c :: nat)"
   364   by (cases c)
   365     (simp add: numeral_3_eq_3 [symmetric] numeral_2_eq_2 [symmetric])
   366 
   367 qualified typedef literal = "{cs. \<forall>c\<in>set cs. \<not> digit7 c}"
   368   morphisms explode Abs_literal
   369 proof
   370   show "[] \<in> {cs. \<forall>c\<in>set cs. \<not> digit7 c}"
   371     by simp
   372 qed
   373 
   374 qualified setup_lifting type_definition_literal
   375 
   376 qualified lift_definition implode :: "string \<Rightarrow> literal"
   377   is "map ascii_of"
   378   by auto
   379 
   380 qualified lemma implode_explode_eq [simp]:
   381   "String.implode (String.explode s) = s"
   382 proof transfer
   383   fix cs
   384   show "map ascii_of cs = cs" if "\<forall>c\<in>set cs. \<not> digit7 c"
   385     using that
   386       by (induction cs) (simp_all add: ascii_of_idem)
   387 qed
   388 
   389 qualified lemma explode_implode_eq [simp]:
   390   "String.explode (String.implode cs) = map ascii_of cs"
   391   by transfer rule
   392 
   393 end
   394 
   395 
   396 subsubsection \<open>Syntactic representation\<close>
   397 
   398 text \<open>
   399   Logical ground representations for literals are:
   400 
   401   \<^enum> \<open>0\<close> for the empty literal;
   402 
   403   \<^enum> \<open>Literal b0 \<dots> b6 s\<close> for a literal starting with one
   404     character and continued by another literal.
   405 
   406   Syntactic representations for literals are:
   407 
   408   \<^enum> Printable text as string prefixed with \<open>STR\<close>;
   409 
   410   \<^enum> A single ascii value as numerical hexadecimal value prefixed with \<open>STR\<close>.
   411 \<close>
   412 
   413 instantiation String.literal :: zero
   414 begin
   415 
   416 context
   417 begin
   418 
   419 qualified lift_definition zero_literal :: String.literal
   420   is Nil
   421   by simp
   422 
   423 instance ..
   424 
   425 end
   426 
   427 end
   428 
   429 context
   430 begin
   431 
   432 qualified abbreviation (output) empty_literal :: String.literal
   433   where "empty_literal \<equiv> 0"
   434 
   435 qualified lift_definition Literal :: "bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> String.literal \<Rightarrow> String.literal"
   436   is "\<lambda>b0 b1 b2 b3 b4 b5 b6 cs. Char b0 b1 b2 b3 b4 b5 b6 False # cs"
   437   by auto
   438 
   439 qualified lemma Literal_eq_iff [simp]:
   440   "Literal b0 b1 b2 b3 b4 b5 b6 s = Literal c0 c1 c2 c3 c4 c5 c6 t
   441      \<longleftrightarrow> (b0 \<longleftrightarrow> c0) \<and> (b1 \<longleftrightarrow> c1) \<and> (b2 \<longleftrightarrow> c2) \<and> (b3 \<longleftrightarrow> c3)
   442          \<and> (b4 \<longleftrightarrow> c4) \<and> (b5 \<longleftrightarrow> c5) \<and> (b6 \<longleftrightarrow> c6) \<and> s = t"
   443   by transfer simp
   444 
   445 qualified lemma empty_neq_Literal [simp]:
   446   "empty_literal \<noteq> Literal b0 b1 b2 b3 b4 b5 b6 s"
   447   by transfer simp
   448 
   449 qualified lemma Literal_neq_empty [simp]:
   450   "Literal b0 b1 b2 b3 b4 b5 b6 s \<noteq> empty_literal"
   451   by transfer simp
   452 
   453 end
   454 
   455 code_datatype "0 :: String.literal" String.Literal
   456 
   457 syntax
   458   "_Literal" :: "str_position \<Rightarrow> String.literal"   ("STR _")
   459   "_Ascii" :: "num_const \<Rightarrow> String.literal"        ("STR _")
   460 
   461 ML_file \<open>Tools/literal.ML\<close>
   462 
   463 
   464 subsubsection \<open>Operations\<close>
   465 
   466 instantiation String.literal :: plus
   467 begin
   468 
   469 context
   470 begin
   471 
   472 qualified lift_definition plus_literal :: "String.literal \<Rightarrow> String.literal \<Rightarrow> String.literal"
   473   is "(@)"
   474   by auto
   475 
   476 instance ..
   477 
   478 end
   479 
   480 end
   481 
   482 instance String.literal :: monoid_add
   483   by (standard; transfer) simp_all
   484 
   485 instantiation String.literal :: size
   486 begin
   487 
   488 context
   489   includes literal.lifting
   490 begin
   491 
   492 lift_definition size_literal :: "String.literal \<Rightarrow> nat"
   493   is length .
   494 
   495 end
   496 
   497 instance ..
   498 
   499 end
   500 
   501 instantiation String.literal :: equal
   502 begin
   503 
   504 context
   505 begin
   506 
   507 qualified lift_definition equal_literal :: "String.literal \<Rightarrow> String.literal \<Rightarrow> bool"
   508   is HOL.equal .
   509 
   510 instance
   511   by (standard; transfer) (simp add: equal)
   512 
   513 end
   514 
   515 end
   516 
   517 instantiation String.literal :: linorder
   518 begin
   519 
   520 context
   521 begin
   522 
   523 qualified lift_definition less_eq_literal :: "String.literal \<Rightarrow> String.literal \<Rightarrow> bool"
   524   is "ord.lexordp_eq (\<lambda>c d. of_char c < (of_char d :: nat))"
   525   .
   526 
   527 qualified lift_definition less_literal :: "String.literal \<Rightarrow> String.literal \<Rightarrow> bool"
   528   is "ord.lexordp (\<lambda>c d. of_char c < (of_char d :: nat))"
   529   .
   530 
   531 instance proof -
   532   from linorder_char interpret linorder "ord.lexordp_eq (\<lambda>c d. of_char c < (of_char d :: nat))"
   533     "ord.lexordp (\<lambda>c d. of_char c < (of_char d :: nat)) :: string \<Rightarrow> string \<Rightarrow> bool"
   534     by (rule linorder.lexordp_linorder)
   535   show "PROP ?thesis"
   536     by (standard; transfer) (simp_all add: less_le_not_le linear)
   537 qed
   538 
   539 end
   540 
   541 end
   542 
   543 lemma infinite_literal:
   544   "infinite (UNIV :: String.literal set)"
   545 proof -
   546   define S where "S = range (\<lambda>n. replicate n CHR ''A'')"
   547   have "inj_on String.implode S"
   548   proof (rule inj_onI)
   549     fix cs ds
   550     assume "String.implode cs = String.implode ds"
   551     then have "String.explode (String.implode cs) = String.explode (String.implode ds)"
   552       by simp
   553     moreover assume "cs \<in> S" and "ds \<in> S"
   554     ultimately show "cs = ds"
   555       by (auto simp add: S_def)
   556   qed
   557   moreover have "infinite S"
   558     by (auto simp add: S_def dest: finite_range_imageI [of _ length])
   559   ultimately have "infinite (String.implode ` S)"
   560     by (simp add: finite_image_iff)
   561   then show ?thesis
   562     by (auto intro: finite_subset)
   563 qed
   564 
   565 
   566 subsubsection \<open>Executable conversions\<close>
   567 
   568 context
   569 begin
   570 
   571 qualified lift_definition asciis_of_literal :: "String.literal \<Rightarrow> integer list"
   572   is "map of_char"
   573   .
   574 
   575 qualified lemma asciis_of_zero [simp, code]:
   576   "asciis_of_literal 0 = []"
   577   by transfer simp
   578 
   579 qualified lemma asciis_of_Literal [simp, code]:
   580   "asciis_of_literal (String.Literal b0 b1 b2 b3 b4 b5 b6 s) =
   581     of_char (Char b0 b1 b2 b3 b4 b5 b6 False) # asciis_of_literal s "
   582   by transfer simp
   583 
   584 qualified lift_definition literal_of_asciis :: "integer list \<Rightarrow> String.literal"
   585   is "map (String.ascii_of \<circ> char_of)"
   586   by auto
   587 
   588 qualified lemma literal_of_asciis_Nil [simp, code]:
   589   "literal_of_asciis [] = 0"
   590   by transfer simp
   591 
   592 qualified lemma literal_of_asciis_Cons [simp, code]:
   593   "literal_of_asciis (k # ks) = (case char_of k
   594     of Char b0 b1 b2 b3 b4 b5 b6 b7 \<Rightarrow> String.Literal b0 b1 b2 b3 b4 b5 b6 (literal_of_asciis ks))"
   595   by (simp add: char_of_def) (transfer, simp add: char_of_def)
   596 
   597 qualified lemma literal_of_asciis_of_literal [simp]:
   598   "literal_of_asciis (asciis_of_literal s) = s"
   599 proof transfer
   600   fix cs
   601   assume "\<forall>c\<in>set cs. \<not> digit7 c"
   602   then show "map (String.ascii_of \<circ> char_of) (map of_char cs) = cs"
   603     by (induction cs) (simp_all add: String.ascii_of_idem) 
   604 qed
   605 
   606 qualified lemma explode_code [code]:
   607   "String.explode s = map char_of (asciis_of_literal s)"
   608   by transfer simp
   609 
   610 qualified lemma implode_code [code]:
   611   "String.implode cs = literal_of_asciis (map of_char cs)"
   612   by transfer simp
   613 
   614 qualified lemma equal_literal [code]:
   615   "HOL.equal (String.Literal b0 b1 b2 b3 b4 b5 b6 s)
   616     (String.Literal a0 a1 a2 a3 a4 a5 a6 r)
   617     \<longleftrightarrow> (b0 \<longleftrightarrow> a0) \<and> (b1 \<longleftrightarrow> a1) \<and> (b2 \<longleftrightarrow> a2) \<and> (b3 \<longleftrightarrow> a3)
   618       \<and> (b4 \<longleftrightarrow> a4) \<and> (b5 \<longleftrightarrow> a5) \<and> (b6 \<longleftrightarrow> a6) \<and> (s = r)"
   619   by (simp add: equal)
   620 
   621 end
   622 
   623 
   624 subsubsection \<open>Technical code generation setup\<close>
   625 
   626 text \<open>Alternative constructor for generated computations\<close>
   627 
   628 context
   629 begin  
   630 
   631 qualified definition Literal' :: "bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> String.literal \<Rightarrow> String.literal"
   632   where [simp]: "Literal' = String.Literal"
   633 
   634 lemma [code]:
   635   "Literal' b0 b1 b2 b3 b4 b5 b6 s = String.literal_of_asciis
   636     [foldr (\<lambda>b k. of_bool b + k * 2) [b0, b1, b2, b3, b4, b5, b6] 0] + s" 
   637   unfolding Literal'_def by transfer (simp add: char_of_def)
   638 
   639 lemma [code_computation_unfold]:
   640   "String.Literal = Literal'"
   641   by simp
   642 
   643 end
   644 
   645 code_reserved SML string String Char List
   646 code_reserved OCaml string String Char List
   647 code_reserved Haskell Prelude
   648 code_reserved Scala string
   649 
   650 code_printing
   651   type_constructor String.literal \<rightharpoonup>
   652     (SML) "string"
   653     and (OCaml) "string"
   654     and (Haskell) "String"
   655     and (Scala) "String"
   656 | constant "STR ''''" \<rightharpoonup>
   657     (SML) "\"\""
   658     and (OCaml) "\"\""
   659     and (Haskell) "\"\""
   660     and (Scala) "\"\""
   661 
   662 setup \<open>
   663   fold Literal.add_code ["SML", "OCaml", "Haskell", "Scala"]
   664 \<close>
   665 
   666 code_printing
   667   constant "(+) :: String.literal \<Rightarrow> String.literal \<Rightarrow> String.literal" \<rightharpoonup>
   668     (SML) infixl 18 "^"
   669     and (OCaml) infixr 6 "^"
   670     and (Haskell) infixr 5 "++"
   671     and (Scala) infixl 7 "+"
   672 | constant String.literal_of_asciis \<rightharpoonup>
   673     (SML) "!(String.implode/ o List.map (fn k => if 0 <= k andalso k < 128 then (Char.chr o IntInf.toInt) k else raise Fail \"Non-ASCII character in literal\"))"
   674     and (OCaml) "!(let xs = _
   675       and chr k =
   676         let l = Z.to'_int k
   677           in if 0 <= l && l < 128
   678           then Char.chr l
   679           else failwith \"Non-ASCII character in literal\"
   680       in String.init (List.length xs) (List.nth (List.map chr xs)))"
   681     and (Haskell) "map/ (let chr k | (0 <= k && k < 128) = Prelude.toEnum k :: Prelude.Char in chr . Prelude.fromInteger)"
   682     and (Scala) "\"\"/ ++/ _.map((k: BigInt) => if (BigInt(0) <= k && k < BigInt(128)) k.charValue else sys.error(\"Non-ASCII character in literal\"))"
   683 | constant String.asciis_of_literal \<rightharpoonup>
   684     (SML) "!(List.map (fn c => let val k = Char.ord c in if k < 128 then IntInf.fromInt k else raise Fail \"Non-ASCII character in literal\" end) /o String.explode)"
   685     and (OCaml) "!(let s = _ in let rec exp i l = if i < 0 then l else exp (i - 1) (let k = Char.code (String.get s i) in
   686       if k < 128 then Z.of'_int k :: l else failwith \"Non-ASCII character in literal\") in exp (String.length s - 1) [])"
   687     and (Haskell) "map/ (let ord k | (k < 128) = Prelude.toInteger k in ord . (Prelude.fromEnum :: Prelude.Char -> Prelude.Int))"
   688     and (Scala) "!(_.toList.map(c => { val k: Int = c.toInt; if (k < 128) BigInt(k) else sys.error(\"Non-ASCII character in literal\") }))"
   689 | class_instance String.literal :: equal \<rightharpoonup>
   690     (Haskell) -
   691 | constant "HOL.equal :: String.literal \<Rightarrow> String.literal \<Rightarrow> bool" \<rightharpoonup>
   692     (SML) "!((_ : string) = _)"
   693     and (OCaml) "!((_ : string) = _)"
   694     and (Haskell) infix 4 "=="
   695     and (Scala) infixl 5 "=="
   696 | constant "(\<le>) :: String.literal \<Rightarrow> String.literal \<Rightarrow> bool" \<rightharpoonup>
   697     (SML) "!((_ : string) <= _)"
   698     and (OCaml) "!((_ : string) <= _)"
   699     and (Haskell) infix 4 "<="
   700     \<comment> \<open>Order operations for \<^typ>\<open>String.literal\<close> work in Haskell only
   701           if no type class instance needs to be generated, because String = [Char] in Haskell
   702           and \<^typ>\<open>char list\<close> need not have the same order as \<^typ>\<open>String.literal\<close>.\<close>
   703     and (Scala) infixl 4 "<="
   704     and (Eval) infixl 6 "<="
   705 | constant "(<) :: String.literal \<Rightarrow> String.literal \<Rightarrow> bool" \<rightharpoonup>
   706     (SML) "!((_ : string) < _)"
   707     and (OCaml) "!((_ : string) < _)"
   708     and (Haskell) infix 4 "<"
   709     and (Scala) infixl 4 "<"
   710     and (Eval) infixl 6 "<"
   711 
   712 
   713 subsubsection \<open>Code generation utility\<close>
   714 
   715 setup \<open>Sign.map_naming (Name_Space.mandatory_path "Code")\<close>
   716 
   717 definition abort :: "String.literal \<Rightarrow> (unit \<Rightarrow> 'a) \<Rightarrow> 'a"
   718   where [simp]: "abort _ f = f ()"
   719 
   720 declare [[code drop: Code.abort]]
   721 
   722 lemma abort_cong:
   723   "msg = msg' \<Longrightarrow> Code.abort msg f = Code.abort msg' f"
   724   by simp
   725 
   726 setup \<open>Sign.map_naming Name_Space.parent_path\<close>
   727 
   728 setup \<open>Code_Simp.map_ss (Simplifier.add_cong @{thm Code.abort_cong})\<close>
   729 
   730 code_printing
   731   constant Code.abort \<rightharpoonup>
   732     (SML) "!(raise/ Fail/ _)"
   733     and (OCaml) "failwith"
   734     and (Haskell) "!(error/ ::/ forall a./ String -> (() -> a) -> a)"
   735     and (Scala) "!{/ sys.error((_));/ ((_)).apply(())/ }"
   736 
   737 
   738 subsubsection \<open>Finally\<close>
   739 
   740 lifting_update literal.lifting
   741 lifting_forget literal.lifting
   742 
   743 end