src/HOL/Tools/string_syntax.ML
author haftmann
Sat, 12 Mar 2016 22:04:52 +0100
changeset 62597 b3f2b8c906a6
parent 58822 90a5e981af3e
child 68028 1f9f973eed2a
permissions -rw-r--r--
model characters directly as range 0..255 * * * operate on syntax terms rather than asts
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     1
(*  Title:      HOL/Tools/string_syntax.ML
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     3
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
     4
Concrete syntax for chars and strings.
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     5
*)
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     6
58822
90a5e981af3e modernized setup;
wenzelm
parents: 55108
diff changeset
     7
structure String_Syntax: sig end =
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     8
struct
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     9
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    10
(* numeral *)
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    11
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    12
fun hex_digit n = if n = 10 then "A"
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    13
  else if n = 11 then "B"
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    14
  else if n = 12 then "C"
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    15
  else if n = 13 then "D"
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    16
  else if n = 14 then "E"
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    17
  else if n = 15 then "F"
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    18
  else string_of_int n;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    19
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    20
fun hex_prefix ms = "0x" ^ implode (replicate (2 - length ms) "0" @ ms);
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    21
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    22
fun hex n = hex_prefix (map hex_digit (radixpand (16, n)));
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    23
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    24
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    25
(* char *)
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    26
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    27
fun mk_char_syntax n =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    28
  if n = 0 then Syntax.const @{const_name Groups.zero}
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    29
  else Syntax.const @{const_syntax Char} $ Numeral.mk_num_syntax n;
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    30
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    31
fun mk_char_syntax' c =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    32
  if Symbol.is_ascii c then mk_char_syntax (ord c)
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    33
  else if c = "\<newline>" then mk_char_syntax 10
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    34
  else error ("Bad character: " ^ quote c);
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    35
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    36
fun plain_string_of str =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    37
  map fst (Lexicon.explode_str (str, Position.none));
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    38
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    39
datatype character = Char of string | Ord of int | Unprintable;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    40
40627
becf5d5187cc renamed raw "explode" function to "raw_explode" to emphasize its meaning;
wenzelm
parents: 35363
diff changeset
    41
val specials = raw_explode "\\\"`'";
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    42
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    43
fun dest_char_syntax c =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    44
  case try Numeral.dest_num_syntax c of
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    45
    SOME n =>
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    46
      if n < 256 then
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    47
        let
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    48
          val s = chr n
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    49
        in
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    50
          if not (member (op =) specials s) andalso Symbol.is_ascii s andalso Symbol.is_printable s
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    51
          then Char s
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    52
          else if s = "\n" then Char "\<newline>"
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    53
          else Ord n
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    54
        end
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    55
      else Unprintable
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    56
  | NONE => Unprintable;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    57
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    58
fun dest_char_ast (Ast.Appl [Ast.Constant @{syntax_const "_Char"}, Ast.Constant s]) =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    59
      plain_string_of s
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    60
  | dest_char_ast _ = raise Match;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    61
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    62
fun char_tr [(c as Const (@{syntax_const "_constrain"}, _)) $ t $ u] =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    63
      c $ char_tr [t] $ u
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    64
  | char_tr [Free (str, _)] =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    65
      (case plain_string_of str of
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    66
        [c] => mk_char_syntax' c
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    67
      | _ => error ("Single character expected: " ^ str))
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    68
  | char_tr ts = raise TERM ("char_tr", ts);
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    69
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    70
fun char_ord_tr [(c as Const (@{syntax_const "_constrain"}, _)) $ t $ u] =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    71
      c $ char_ord_tr [t] $ u
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    72
  | char_ord_tr [Const (num, _)] =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    73
      (mk_char_syntax o #value o Lexicon.read_num) num
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    74
  | char_ord_tr ts = raise TERM ("char_ord_tr", ts);
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    75
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    76
fun char_tr' [t] = (case dest_char_syntax t of
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    77
        Char s => Syntax.const @{syntax_const "_Char"} $
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    78
          Syntax.const (Lexicon.implode_str [s])
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    79
      | Ord n => 
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    80
          if n = 0
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    81
          then Syntax.const @{const_syntax Groups.zero}
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    82
          else Syntax.const @{syntax_const "_Char_ord"} $
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    83
            Syntax.free (hex n)
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    84
      | _ => raise Match)
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    85
  | char_tr' _ = raise Match;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    86
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    87
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    88
(* string *)
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    89
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    90
fun mk_string_syntax [] = Syntax.const @{const_syntax Nil}
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    91
  | mk_string_syntax (c :: cs) =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    92
      Syntax.const @{const_syntax Cons} $ mk_char_syntax' c $ mk_string_syntax cs;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    93
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    94
fun mk_string_ast ss =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    95
  Ast.Appl [Ast.Constant @{syntax_const "_inner_string"},
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    96
    Ast.Variable (Lexicon.implode_str ss)];
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    97
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    98
fun string_tr [(c as Const (@{syntax_const "_constrain"}, _)) $ t $ u] =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
    99
      c $ string_tr [t] $ u
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   100
  | string_tr [Free (str, _)] =
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   101
      mk_string_syntax (plain_string_of str)
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   102
  | string_tr ts = raise TERM ("char_tr", ts);
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   103
35115
446c5063e4fd modernized translations;
wenzelm
parents: 31048
diff changeset
   104
fun list_ast_tr' [args] =
42224
578a51fae383 discontinued special treatment of structure Ast: no pervasive content, no inclusion in structure Syntax;
wenzelm
parents: 40627
diff changeset
   105
      Ast.Appl [Ast.Constant @{syntax_const "_String"},
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   106
        (mk_string_ast o maps dest_char_ast o Ast.unfold_ast @{syntax_const "_args"}) args]
51160
599ff65b85e2 systematic conversions between nat and nibble/char;
haftmann
parents: 46483
diff changeset
   107
  | list_ast_tr' _ = raise Match;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   108
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   109
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   110
(* theory setup *)
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   111
58822
90a5e981af3e modernized setup;
wenzelm
parents: 55108
diff changeset
   112
val _ =
90a5e981af3e modernized setup;
wenzelm
parents: 55108
diff changeset
   113
  Theory.setup
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   114
   (Sign.parse_translation
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   115
     [(@{syntax_const "_Char"}, K char_tr),
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   116
      (@{syntax_const "_Char_ord"}, K char_ord_tr),
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   117
      (@{syntax_const "_String"}, K string_tr)] #>
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   118
    Sign.print_translation
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   119
     [(@{const_syntax Char}, K char_tr')] #>
58822
90a5e981af3e modernized setup;
wenzelm
parents: 55108
diff changeset
   120
    Sign.print_ast_translation
62597
b3f2b8c906a6 model characters directly as range 0..255
haftmann
parents: 58822
diff changeset
   121
     [(@{syntax_const "_list"}, K list_ast_tr')]);
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   122
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   123
end;