src/HOL/String.thy
changeset 31051 4d9b52e0a48c
child 31055 2cf6efca6c71
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/HOL/String.thy	Wed May 06 16:01:23 2009 +0200
     1.3 @@ -0,0 +1,291 @@
     1.4 +(* Author: Tobias Nipkow, Florian Haftmann, TU Muenchen *)
     1.5 +
     1.6 +header {* Character and string types *}
     1.7 +
     1.8 +theory String
     1.9 +imports List
    1.10 +uses "Tools/string_syntax.ML"
    1.11 +begin
    1.12 +
    1.13 +subsection {* Characters *}
    1.14 +
    1.15 +datatype nibble =
    1.16 +    Nibble0 | Nibble1 | Nibble2 | Nibble3 | Nibble4 | Nibble5 | Nibble6 | Nibble7
    1.17 +  | Nibble8 | Nibble9 | NibbleA | NibbleB | NibbleC | NibbleD | NibbleE | NibbleF
    1.18 +
    1.19 +lemma UNIV_nibble:
    1.20 +  "UNIV = {Nibble0, Nibble1, Nibble2, Nibble3, Nibble4, Nibble5, Nibble6, Nibble7,
    1.21 +    Nibble8, Nibble9, NibbleA, NibbleB, NibbleC, NibbleD, NibbleE, NibbleF}" (is "_ = ?A")
    1.22 +proof (rule UNIV_eq_I)
    1.23 +  fix x show "x \<in> ?A" by (cases x) simp_all
    1.24 +qed
    1.25 +
    1.26 +instance nibble :: finite
    1.27 +  by default (simp add: UNIV_nibble)
    1.28 +
    1.29 +datatype char = Char nibble nibble
    1.30 +  -- "Note: canonical order of character encoding coincides with standard term ordering"
    1.31 +
    1.32 +lemma UNIV_char:
    1.33 +  "UNIV = image (split Char) (UNIV \<times> UNIV)"
    1.34 +proof (rule UNIV_eq_I)
    1.35 +  fix x show "x \<in> image (split Char) (UNIV \<times> UNIV)" by (cases x) auto
    1.36 +qed
    1.37 +
    1.38 +instance char :: finite
    1.39 +  by default (simp add: UNIV_char)
    1.40 +
    1.41 +lemma size_char [code, simp]:
    1.42 +  "size (c::char) = 0" by (cases c) simp
    1.43 +
    1.44 +lemma char_size [code, simp]:
    1.45 +  "char_size (c::char) = 0" by (cases c) simp
    1.46 +
    1.47 +primrec nibble_pair_of_char :: "char \<Rightarrow> nibble \<times> nibble" where
    1.48 +  "nibble_pair_of_char (Char n m) = (n, m)"
    1.49 +
    1.50 +declare nibble_pair_of_char.simps [code del]
    1.51 +
    1.52 +setup {*
    1.53 +let
    1.54 +  val nibbles = map (Thm.cterm_of @{theory} o HOLogic.mk_nibble) (0 upto 15);
    1.55 +  val thms = map_product
    1.56 +   (fn n => fn m => Drule.instantiate' [] [SOME n, SOME m] @{thm nibble_pair_of_char.simps})
    1.57 +      nibbles nibbles;
    1.58 +in
    1.59 +  PureThy.note_thmss Thm.lemmaK [((Binding.name "nibble_pair_of_char_simps", []), [(thms, [])])]
    1.60 +  #-> (fn [(_, thms)] => fold_rev Code.add_eqn thms)
    1.61 +end
    1.62 +*}
    1.63 +
    1.64 +lemma char_case_nibble_pair [code, code inline]:
    1.65 +  "char_case f = split f o nibble_pair_of_char"
    1.66 +  by (simp add: expand_fun_eq split: char.split)
    1.67 +
    1.68 +lemma char_rec_nibble_pair [code, code inline]:
    1.69 +  "char_rec f = split f o nibble_pair_of_char"
    1.70 +  unfolding char_case_nibble_pair [symmetric]
    1.71 +  by (simp add: expand_fun_eq split: char.split)
    1.72 +
    1.73 +syntax
    1.74 +  "_Char" :: "xstr => char"    ("CHR _")
    1.75 +
    1.76 +
    1.77 +subsection {* Strings *}
    1.78 +
    1.79 +types string = "char list"
    1.80 +
    1.81 +syntax
    1.82 +  "_String" :: "xstr => string"    ("_")
    1.83 +
    1.84 +setup StringSyntax.setup
    1.85 +
    1.86 +
    1.87 +subsection {* Strings as dedicated datatype *}
    1.88 +
    1.89 +datatype message_string = STR string
    1.90 +
    1.91 +lemmas [code del] =
    1.92 +  message_string.recs message_string.cases
    1.93 +
    1.94 +lemma [code]: "size (s\<Colon>message_string) = 0"
    1.95 +  by (cases s) simp_all
    1.96 +
    1.97 +lemma [code]: "message_string_size (s\<Colon>message_string) = 0"
    1.98 +  by (cases s) simp_all
    1.99 +
   1.100 +
   1.101 +subsection {* Code generator *}
   1.102 +
   1.103 +text {* This also covers pretty syntax for list literals. *}
   1.104 +
   1.105 +ML {*
   1.106 +local
   1.107 +
   1.108 +open Basic_Code_Thingol;
   1.109 +
   1.110 +fun implode_list naming t = case pairself
   1.111 +  (Code_Thingol.lookup_const naming) (@{const_name Nil}, @{const_name Cons})
   1.112 +   of (SOME nil', SOME cons') => let
   1.113 +          fun dest_cons (IConst (c, _) `$ t1 `$ t2) =
   1.114 +                if c = cons'
   1.115 +                then SOME (t1, t2)
   1.116 +                else NONE
   1.117 +            | dest_cons _ = NONE;
   1.118 +          val (ts, t') = Code_Thingol.unfoldr dest_cons t;
   1.119 +        in case t'
   1.120 +         of IConst (c, _) => if c = nil' then SOME ts else NONE
   1.121 +          | _ => NONE
   1.122 +        end
   1.123 +    | _ => NONE
   1.124 +
   1.125 +fun decode_char naming (IConst (c1, _), IConst (c2, _)) = (case map_filter
   1.126 +  (Code_Thingol.lookup_const naming)[@{const_name Nibble0}, @{const_name Nibble1},
   1.127 +   @{const_name Nibble2}, @{const_name Nibble3},
   1.128 +   @{const_name Nibble4}, @{const_name Nibble5},
   1.129 +   @{const_name Nibble6}, @{const_name Nibble7},
   1.130 +   @{const_name Nibble8}, @{const_name Nibble9},
   1.131 +   @{const_name NibbleA}, @{const_name NibbleB},
   1.132 +   @{const_name NibbleC}, @{const_name NibbleD},
   1.133 +   @{const_name NibbleE}, @{const_name NibbleF}]
   1.134 +   of nibbles' as [_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _] => let
   1.135 +          fun idx c = find_index (curry (op =) c) nibbles';
   1.136 +          fun decode ~1 _ = NONE
   1.137 +            | decode _ ~1 = NONE
   1.138 +            | decode n m = SOME (chr (n * 16 + m));
   1.139 +        in decode (idx c1) (idx c2) end
   1.140 +    | _ => NONE)
   1.141 + | decode_char _ _ = NONE
   1.142 +   
   1.143 +fun implode_string naming mk_char mk_string ts = case
   1.144 +  Code_Thingol.lookup_const naming @{const_name Char}
   1.145 +   of SOME char' => let
   1.146 +        fun implode_char (IConst (c, _) `$ t1 `$ t2) =
   1.147 +              if c = char' then decode_char naming (t1, t2) else NONE
   1.148 +          | implode_char _ = NONE;
   1.149 +        val ts' = map implode_char ts;
   1.150 +      in if forall is_some ts'
   1.151 +        then (SOME o Code_Printer.str o mk_string o implode o map_filter I) ts'
   1.152 +        else NONE
   1.153 +      end
   1.154 +    | _ => NONE;
   1.155 +
   1.156 +fun default_list (target_fxy, target_cons) pr fxy t1 t2 =
   1.157 +  Code_Printer.brackify_infix (target_fxy, Code_Printer.R) fxy [
   1.158 +    pr (Code_Printer.INFX (target_fxy, Code_Printer.X)) t1,
   1.159 +    Code_Printer.str target_cons,
   1.160 +    pr (Code_Printer.INFX (target_fxy, Code_Printer.R)) t2
   1.161 +  ];
   1.162 +
   1.163 +fun pretty_list literals =
   1.164 +  let
   1.165 +    val mk_list = Code_Printer.literal_list literals;
   1.166 +    fun pretty pr naming thm vars fxy [(t1, _), (t2, _)] =
   1.167 +      case Option.map (cons t1) (implode_list naming t2)
   1.168 +       of SOME ts => mk_list (map (pr vars Code_Printer.NOBR) ts)
   1.169 +        | NONE => default_list (Code_Printer.infix_cons literals) (pr vars) fxy t1 t2;
   1.170 +  in (2, pretty) end;
   1.171 +
   1.172 +fun pretty_list_string literals =
   1.173 +  let
   1.174 +    val mk_list = Code_Printer.literal_list literals;
   1.175 +    val mk_char = Code_Printer.literal_char literals;
   1.176 +    val mk_string = Code_Printer.literal_string literals;
   1.177 +    fun pretty pr naming thm vars fxy [(t1, _), (t2, _)] =
   1.178 +      case Option.map (cons t1) (implode_list naming t2)
   1.179 +       of SOME ts => (case implode_string naming mk_char mk_string ts
   1.180 +           of SOME p => p
   1.181 +            | NONE => mk_list (map (pr vars Code_Printer.NOBR) ts))
   1.182 +        | NONE => default_list (Code_Printer.infix_cons literals) (pr vars) fxy t1 t2;
   1.183 +  in (2, pretty) end;
   1.184 +
   1.185 +fun pretty_char literals =
   1.186 +  let
   1.187 +    val mk_char = Code_Printer.literal_char literals;
   1.188 +    fun pretty _ naming thm _ _ [(t1, _), (t2, _)] =
   1.189 +      case decode_char naming (t1, t2)
   1.190 +       of SOME c => (Code_Printer.str o mk_char) c
   1.191 +        | NONE => Code_Printer.nerror thm "Illegal character expression";
   1.192 +  in (2, pretty) end;
   1.193 +
   1.194 +fun pretty_message literals =
   1.195 +  let
   1.196 +    val mk_char = Code_Printer.literal_char literals;
   1.197 +    val mk_string = Code_Printer.literal_string literals;
   1.198 +    fun pretty _ naming thm _ _ [(t, _)] =
   1.199 +      case implode_list naming t
   1.200 +       of SOME ts => (case implode_string naming mk_char mk_string ts
   1.201 +           of SOME p => p
   1.202 +            | NONE => Code_Printer.nerror thm "Illegal message expression")
   1.203 +        | NONE => Code_Printer.nerror thm "Illegal message expression";
   1.204 +  in (1, pretty) end;
   1.205 +
   1.206 +in
   1.207 +
   1.208 +fun add_literal_list target thy =
   1.209 +  let
   1.210 +    val pr = pretty_list (Code_Target.the_literals thy target);
   1.211 +  in
   1.212 +    thy
   1.213 +    |> Code_Target.add_syntax_const target @{const_name Cons} (SOME pr)
   1.214 +  end;
   1.215 +
   1.216 +fun add_literal_list_string target thy =
   1.217 +  let
   1.218 +    val pr = pretty_list_string (Code_Target.the_literals thy target);
   1.219 +  in
   1.220 +    thy
   1.221 +    |> Code_Target.add_syntax_const target @{const_name Cons} (SOME pr)
   1.222 +  end;
   1.223 +
   1.224 +fun add_literal_char target thy =
   1.225 +  let
   1.226 +    val pr = pretty_char (Code_Target.the_literals thy target);
   1.227 +  in
   1.228 +    thy
   1.229 +    |> Code_Target.add_syntax_const target @{const_name Char} (SOME pr)
   1.230 +  end;
   1.231 +
   1.232 +fun add_literal_message str target thy =
   1.233 +  let
   1.234 +    val pr = pretty_message (Code_Target.the_literals thy target);
   1.235 +  in
   1.236 +    thy
   1.237 +    |> Code_Target.add_syntax_const target str (SOME pr)
   1.238 +  end;
   1.239 +
   1.240 +end;
   1.241 +*}
   1.242 +
   1.243 +setup {*
   1.244 +  fold (fn target => add_literal_list target) ["SML", "OCaml", "Haskell"]
   1.245 +*}
   1.246 +
   1.247 +code_type message_string
   1.248 +  (SML "string")
   1.249 +  (OCaml "string")
   1.250 +  (Haskell "String")
   1.251 +
   1.252 +setup {*
   1.253 +  fold (fn target => add_literal_message @{const_name STR} target)
   1.254 +    ["SML", "OCaml", "Haskell"]
   1.255 +*}
   1.256 +
   1.257 +code_instance message_string :: eq
   1.258 +  (Haskell -)
   1.259 +
   1.260 +code_const "eq_class.eq \<Colon> message_string \<Rightarrow> message_string \<Rightarrow> bool"
   1.261 +  (SML "!((_ : string) = _)")
   1.262 +  (OCaml "!((_ : string) = _)")
   1.263 +  (Haskell infixl 4 "==")
   1.264 +
   1.265 +code_reserved SML string
   1.266 +code_reserved OCaml string
   1.267 +
   1.268 +
   1.269 +types_code
   1.270 +  "char" ("string")
   1.271 +attach (term_of) {*
   1.272 +val term_of_char = HOLogic.mk_char o ord;
   1.273 +*}
   1.274 +attach (test) {*
   1.275 +fun gen_char i =
   1.276 +  let val j = random_range (ord "a") (Int.min (ord "a" + i, ord "z"))
   1.277 +  in (chr j, fn () => HOLogic.mk_char j) end;
   1.278 +*}
   1.279 +
   1.280 +setup {*
   1.281 +let
   1.282 +
   1.283 +fun char_codegen thy defs dep thyname b t gr =
   1.284 +  let
   1.285 +    val i = HOLogic.dest_char t;
   1.286 +    val (_, gr') = Codegen.invoke_tycodegen thy defs dep thyname false
   1.287 +      (fastype_of t) gr;
   1.288 +  in SOME (Codegen.str (ML_Syntax.print_string (chr i)), gr')
   1.289 +  end handle TERM _ => NONE;
   1.290 +
   1.291 +in Codegen.add_codegen "char_codegen" char_codegen end
   1.292 +*}
   1.293 +
   1.294 +end
   1.295 \ No newline at end of file