src/HOL/Tools/string_syntax.ML
author wenzelm
Fri, 04 Jan 2019 23:22:53 +0100
changeset 69593 3dda49e08b9d
parent 68939 bcce5967e10e
permissions -rw-r--r--
isabelle update -u control_cartouches;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     1
(*  Title:      HOL/Tools/string_syntax.ML
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     3
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
     4
Concrete syntax for characters and strings.
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     5
*)
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     6
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
     7
signature STRING_SYNTAX = sig
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
     8
  val hex: int -> string
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
     9
  val mk_bits_syntax: int -> int -> term list
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    10
  val dest_bits_syntax: term list -> int
68939
bcce5967e10e more explicit notion of ord value for HOL characters
haftmann
parents: 68099
diff changeset
    11
  val ascii_ord_of: string -> int
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    12
  val plain_strings_of: string -> string list
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    13
  datatype character = Char of string | Ord of int
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    14
  val classify_character: int -> character
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    15
end
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    16
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    17
structure String_Syntax: STRING_SYNTAX =
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    18
struct
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    19
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    20
(* numeral *)
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    21
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    22
fun hex_digit n = if n = 10 then "A"
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    23
  else if n = 11 then "B"
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    24
  else if n = 12 then "C"
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    25
  else if n = 13 then "D"
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    26
  else if n = 14 then "E"
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    27
  else if n = 15 then "F"
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    28
  else string_of_int n;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    29
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    30
fun hex_prefix ms = "0x" ^ implode (replicate (2 - length ms) "0" @ ms);
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    31
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    32
fun hex n = hex_prefix (map hex_digit (radixpand (16, n)));
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    33
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    34
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    35
(* booleans as bits *)
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    36
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    37
fun mk_bit_syntax b =
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
    38
  Syntax.const (if b = 1 then \<^const_syntax>\<open>True\<close> else \<^const_syntax>\<open>False\<close>);
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    39
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    40
fun mk_bits_syntax len = map mk_bit_syntax o Integer.radicify 2 len;
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    41
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
    42
fun dest_bit_syntax (Const (\<^const_syntax>\<open>True\<close>, _)) = 1 
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
    43
  | dest_bit_syntax (Const (\<^const_syntax>\<open>False\<close>, _)) = 0
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    44
  | dest_bit_syntax _ = raise Match;
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    45
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    46
val dest_bits_syntax = Integer.eval_radix 2 o map dest_bit_syntax;
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    47
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    48
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    49
(* char *)
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    50
68939
bcce5967e10e more explicit notion of ord value for HOL characters
haftmann
parents: 68099
diff changeset
    51
fun ascii_ord_of c =
bcce5967e10e more explicit notion of ord value for HOL characters
haftmann
parents: 68099
diff changeset
    52
  if Symbol.is_ascii c then ord c
bcce5967e10e more explicit notion of ord value for HOL characters
haftmann
parents: 68099
diff changeset
    53
  else if c = "\<newline>" then 10
bcce5967e10e more explicit notion of ord value for HOL characters
haftmann
parents: 68099
diff changeset
    54
  else error ("Bad character: " ^ quote c);
bcce5967e10e more explicit notion of ord value for HOL characters
haftmann
parents: 68099
diff changeset
    55
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    56
fun mk_char_syntax i =
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
    57
  list_comb (Syntax.const \<^const_syntax>\<open>Char\<close>, mk_bits_syntax 8 i);
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    58
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    59
fun plain_strings_of str =
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    60
  map fst (Lexicon.explode_str (str, Position.none));
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    61
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    62
datatype character = Char of string | Ord of int;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    63
40627
becf5d5187cc renamed raw "explode" function to "raw_explode" to emphasize its meaning;
wenzelm
parents: 35363
diff changeset
    64
val specials = raw_explode "\\\"`'";
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    65
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    66
fun classify_character i =
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    67
  let
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    68
    val c = chr i
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    69
  in
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    70
    if not (member (op =) specials c) andalso Symbol.is_ascii c andalso Symbol.is_printable c
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    71
    then Char c
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    72
    else if c = "\n"
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    73
    then Char "\<newline>"
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    74
    else Ord i
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    75
  end;
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    76
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    77
fun dest_char_syntax b0 b1 b2 b3 b4 b5 b6 b7 =
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    78
  classify_character (dest_bits_syntax [b0, b1, b2, b3, b4, b5, b6, b7])
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    79
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
    80
fun dest_char_ast (Ast.Appl [Ast.Constant \<^syntax_const>\<open>_Char\<close>, Ast.Constant s]) =
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    81
      plain_strings_of s
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    82
  | dest_char_ast _ = raise Match;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    83
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
    84
fun char_tr [(c as Const (\<^syntax_const>\<open>_constrain\<close>, _)) $ t $ u] =
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    85
      c $ char_tr [t] $ u
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    86
  | char_tr [Free (str, _)] =
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    87
      (case plain_strings_of str of
68939
bcce5967e10e more explicit notion of ord value for HOL characters
haftmann
parents: 68099
diff changeset
    88
        [c] => mk_char_syntax (ascii_ord_of c)
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    89
      | _ => error ("Single character expected: " ^ str))
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    90
  | char_tr ts = raise TERM ("char_tr", ts);
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    91
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
    92
fun char_ord_tr [(c as Const (\<^syntax_const>\<open>_constrain\<close>, _)) $ t $ u] =
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    93
      c $ char_ord_tr [t] $ u
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    94
  | char_ord_tr [Const (num, _)] =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    95
      (mk_char_syntax o #value o Lexicon.read_num) num
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    96
  | char_ord_tr ts = raise TERM ("char_ord_tr", ts);
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    97
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    98
fun char_tr' [b1, b2, b3, b4, b5, b6, b7, b8] =
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
    99
      (case dest_char_syntax b1 b2 b3 b4 b5 b6 b7 b8 of
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
   100
        Char s => Syntax.const \<^syntax_const>\<open>_Char\<close> $
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   101
          Syntax.const (Lexicon.implode_str [s])
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
   102
      | Ord n => Syntax.const \<^syntax_const>\<open>_Char_ord\<close> $
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
   103
          Syntax.free (hex n))
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   104
  | char_tr' _ = raise Match;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   105
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   106
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   107
(* string *)
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   108
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
   109
fun mk_string_syntax [] = Syntax.const \<^const_syntax>\<open>Nil\<close>
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   110
  | mk_string_syntax (c :: cs) =
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
   111
      Syntax.const \<^const_syntax>\<open>Cons\<close> $ mk_char_syntax (ascii_ord_of c)
68939
bcce5967e10e more explicit notion of ord value for HOL characters
haftmann
parents: 68099
diff changeset
   112
        $ mk_string_syntax cs;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   113
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   114
fun mk_string_ast ss =
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
   115
  Ast.Appl [Ast.Constant \<^syntax_const>\<open>_inner_string\<close>,
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   116
    Ast.Variable (Lexicon.implode_str ss)];
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   117
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
   118
fun string_tr [(c as Const (\<^syntax_const>\<open>_constrain\<close>, _)) $ t $ u] =
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   119
      c $ string_tr [t] $ u
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   120
  | string_tr [Free (str, _)] =
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
   121
      mk_string_syntax (plain_strings_of str)
68099
haftmann
parents: 68028
diff changeset
   122
  | string_tr ts = raise TERM ("string_tr", ts);
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   123
35115
446c5063e4fd modernized translations;
wenzelm
parents: 31048
diff changeset
   124
fun list_ast_tr' [args] =
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
   125
      Ast.Appl [Ast.Constant \<^syntax_const>\<open>_String\<close>,
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
   126
        (mk_string_ast o maps dest_char_ast o Ast.unfold_ast \<^syntax_const>\<open>_args\<close>) args]
51160
599ff65b85e2 systematic conversions between nat and nibble/char;
haftmann
parents: 46483
diff changeset
   127
  | list_ast_tr' _ = raise Match;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   128
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   129
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   130
(* theory setup *)
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   131
58822
90a5e981af3e modernized setup;
wenzelm
parents: 55108
diff changeset
   132
val _ =
90a5e981af3e modernized setup;
wenzelm
parents: 55108
diff changeset
   133
  Theory.setup
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   134
   (Sign.parse_translation
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
   135
     [(\<^syntax_const>\<open>_Char\<close>, K char_tr),
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
   136
      (\<^syntax_const>\<open>_Char_ord\<close>, K char_ord_tr),
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
   137
      (\<^syntax_const>\<open>_String\<close>, K string_tr)] #>
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   138
    Sign.print_translation
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
   139
     [(\<^const_syntax>\<open>Char\<close>, K char_tr')] #>
58822
90a5e981af3e modernized setup;
wenzelm
parents: 55108
diff changeset
   140
    Sign.print_ast_translation
69593
3dda49e08b9d isabelle update -u control_cartouches;
wenzelm
parents: 68939
diff changeset
   141
     [(\<^syntax_const>\<open>_list\<close>, K list_ast_tr')]);
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   142
68028
1f9f973eed2a proper datatype for 8-bit characters
haftmann
parents: 62597
diff changeset
   143
end