src/HOL/Tools/string_syntax.ML
author blanchet
Thu, 28 Aug 2014 07:34:23 +0200
changeset 58067 a7a0af643499
parent 55108 0b7a0c1fdf7e
child 58822 90a5e981af3e
permissions -rw-r--r--
tuned terminology
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     1
(*  Title:      HOL/Tools/string_syntax.ML
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     3
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     4
Concrete syntax for hex chars and strings.
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     5
*)
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     6
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     7
signature STRING_SYNTAX =
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     8
sig
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
     9
  val setup: theory -> theory
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    10
end;
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    11
35123
e286d5df187a modernized structures;
wenzelm
parents: 35115
diff changeset
    12
structure String_Syntax: STRING_SYNTAX =
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    13
struct
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    14
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    15
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    16
(* nibble *)
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    17
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    18
val mk_nib =
42290
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42248
diff changeset
    19
  Ast.Constant o Lexicon.mark_const o
35123
e286d5df187a modernized structures;
wenzelm
parents: 35115
diff changeset
    20
    fst o Term.dest_Const o HOLogic.mk_nibble;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    21
42224
578a51fae383 discontinued special treatment of structure Ast: no pervasive content, no inclusion in structure Syntax;
wenzelm
parents: 40627
diff changeset
    22
fun dest_nib (Ast.Constant s) =
42290
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42248
diff changeset
    23
  (case try Lexicon.unmark_const s of
35256
b73ae1a8fe7e adapted to authentic syntax;
wenzelm
parents: 35123
diff changeset
    24
    NONE => raise Match
b73ae1a8fe7e adapted to authentic syntax;
wenzelm
parents: 35123
diff changeset
    25
  | SOME c => (HOLogic.dest_nibble (Const (c, HOLogic.nibbleT)) handle TERM _ => raise Match));
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    26
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    27
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    28
(* char *)
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    29
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    30
fun mk_char s =
55015
e33c5bd729ff added \<newline> symbol, which is used for char/string literals in HOL;
wenzelm
parents: 52143
diff changeset
    31
  let
e33c5bd729ff added \<newline> symbol, which is used for char/string literals in HOL;
wenzelm
parents: 52143
diff changeset
    32
    val c =
e33c5bd729ff added \<newline> symbol, which is used for char/string literals in HOL;
wenzelm
parents: 52143
diff changeset
    33
      if Symbol.is_ascii s then ord s
e33c5bd729ff added \<newline> symbol, which is used for char/string literals in HOL;
wenzelm
parents: 52143
diff changeset
    34
      else if s = "\<newline>" then 10
e33c5bd729ff added \<newline> symbol, which is used for char/string literals in HOL;
wenzelm
parents: 52143
diff changeset
    35
      else error ("Bad character: " ^ quote s);
e33c5bd729ff added \<newline> symbol, which is used for char/string literals in HOL;
wenzelm
parents: 52143
diff changeset
    36
  in Ast.Appl [Ast.Constant @{const_syntax Char}, mk_nib (c div 16), mk_nib (c mod 16)] end;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    37
40627
becf5d5187cc renamed raw "explode" function to "raw_explode" to emphasize its meaning;
wenzelm
parents: 35363
diff changeset
    38
val specials = raw_explode "\\\"`'";
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    39
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    40
fun dest_chr c1 c2 =
55108
0b7a0c1fdf7e inner syntax token language allows regular quoted strings;
wenzelm
parents: 55015
diff changeset
    41
  let val s = chr (dest_nib c1 * 16 + dest_nib c2) in
0b7a0c1fdf7e inner syntax token language allows regular quoted strings;
wenzelm
parents: 55015
diff changeset
    42
    if not (member (op =) specials s) andalso Symbol.is_ascii s andalso Symbol.is_printable s
0b7a0c1fdf7e inner syntax token language allows regular quoted strings;
wenzelm
parents: 55015
diff changeset
    43
    then s
0b7a0c1fdf7e inner syntax token language allows regular quoted strings;
wenzelm
parents: 55015
diff changeset
    44
    else if s = "\n" then "\<newline>"
55015
e33c5bd729ff added \<newline> symbol, which is used for char/string literals in HOL;
wenzelm
parents: 52143
diff changeset
    45
    else raise Match
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    46
  end;
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    47
42224
578a51fae383 discontinued special treatment of structure Ast: no pervasive content, no inclusion in structure Syntax;
wenzelm
parents: 40627
diff changeset
    48
fun dest_char (Ast.Appl [Ast.Constant @{const_syntax Char}, c1, c2]) = dest_chr c1 c2
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    49
  | dest_char _ = raise Match;
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    50
55108
0b7a0c1fdf7e inner syntax token language allows regular quoted strings;
wenzelm
parents: 55015
diff changeset
    51
fun syntax_string ss =
42290
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42248
diff changeset
    52
  Ast.Appl [Ast.Constant @{syntax_const "_inner_string"},
55108
0b7a0c1fdf7e inner syntax token language allows regular quoted strings;
wenzelm
parents: 55015
diff changeset
    53
    Ast.Variable (Lexicon.implode_str ss)];
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    54
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    55
46483
10a9c31a22b4 renamed "xstr" to "str_token";
wenzelm
parents: 45490
diff changeset
    56
fun char_ast_tr [Ast.Variable str] =
55108
0b7a0c1fdf7e inner syntax token language allows regular quoted strings;
wenzelm
parents: 55015
diff changeset
    57
      (case Lexicon.explode_str (str, Position.none) of
0b7a0c1fdf7e inner syntax token language allows regular quoted strings;
wenzelm
parents: 55015
diff changeset
    58
        [(s, _)] => mk_char s
46483
10a9c31a22b4 renamed "xstr" to "str_token";
wenzelm
parents: 45490
diff changeset
    59
      | _ => error ("Single character expected: " ^ str))
45490
20c8c0cca555 inner syntax positions for string literals;
wenzelm
parents: 42290
diff changeset
    60
  | char_ast_tr [Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, ast1, ast2]] =
20c8c0cca555 inner syntax positions for string literals;
wenzelm
parents: 42290
diff changeset
    61
      Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, char_ast_tr [ast1], ast2]
42224
578a51fae383 discontinued special treatment of structure Ast: no pervasive content, no inclusion in structure Syntax;
wenzelm
parents: 40627
diff changeset
    62
  | char_ast_tr asts = raise Ast.AST ("char_ast_tr", asts);
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    63
35115
446c5063e4fd modernized translations;
wenzelm
parents: 31048
diff changeset
    64
fun char_ast_tr' [c1, c2] =
42224
578a51fae383 discontinued special treatment of structure Ast: no pervasive content, no inclusion in structure Syntax;
wenzelm
parents: 40627
diff changeset
    65
      Ast.Appl [Ast.Constant @{syntax_const "_Char"}, syntax_string [dest_chr c1 c2]]
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    66
  | char_ast_tr' _ = raise Match;
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    67
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    68
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    69
(* string *)
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    70
42224
578a51fae383 discontinued special treatment of structure Ast: no pervasive content, no inclusion in structure Syntax;
wenzelm
parents: 40627
diff changeset
    71
fun mk_string [] = Ast.Constant @{const_syntax Nil}
55108
0b7a0c1fdf7e inner syntax token language allows regular quoted strings;
wenzelm
parents: 55015
diff changeset
    72
  | mk_string (s :: ss) =
0b7a0c1fdf7e inner syntax token language allows regular quoted strings;
wenzelm
parents: 55015
diff changeset
    73
      Ast.Appl [Ast.Constant @{const_syntax Cons}, mk_char s, mk_string ss];
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    74
46483
10a9c31a22b4 renamed "xstr" to "str_token";
wenzelm
parents: 45490
diff changeset
    75
fun string_ast_tr [Ast.Variable str] =
55108
0b7a0c1fdf7e inner syntax token language allows regular quoted strings;
wenzelm
parents: 55015
diff changeset
    76
      (case Lexicon.explode_str (str, Position.none) of
42224
578a51fae383 discontinued special treatment of structure Ast: no pervasive content, no inclusion in structure Syntax;
wenzelm
parents: 40627
diff changeset
    77
        [] =>
578a51fae383 discontinued special treatment of structure Ast: no pervasive content, no inclusion in structure Syntax;
wenzelm
parents: 40627
diff changeset
    78
          Ast.Appl
42248
04bffad68aa4 discontinued old-style Syntax.constrainC;
wenzelm
parents: 42224
diff changeset
    79
            [Ast.Constant @{syntax_const "_constrain"},
42224
578a51fae383 discontinued special treatment of structure Ast: no pervasive content, no inclusion in structure Syntax;
wenzelm
parents: 40627
diff changeset
    80
              Ast.Constant @{const_syntax Nil}, Ast.Constant @{type_syntax string}]
55108
0b7a0c1fdf7e inner syntax token language allows regular quoted strings;
wenzelm
parents: 55015
diff changeset
    81
      | ss => mk_string (map Symbol_Pos.symbol ss))
45490
20c8c0cca555 inner syntax positions for string literals;
wenzelm
parents: 42290
diff changeset
    82
  | string_ast_tr [Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, ast1, ast2]] =
20c8c0cca555 inner syntax positions for string literals;
wenzelm
parents: 42290
diff changeset
    83
      Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, string_ast_tr [ast1], ast2]
42224
578a51fae383 discontinued special treatment of structure Ast: no pervasive content, no inclusion in structure Syntax;
wenzelm
parents: 40627
diff changeset
    84
  | string_ast_tr asts = raise Ast.AST ("string_tr", asts);
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    85
35115
446c5063e4fd modernized translations;
wenzelm
parents: 31048
diff changeset
    86
fun list_ast_tr' [args] =
42224
578a51fae383 discontinued special treatment of structure Ast: no pervasive content, no inclusion in structure Syntax;
wenzelm
parents: 40627
diff changeset
    87
      Ast.Appl [Ast.Constant @{syntax_const "_String"},
578a51fae383 discontinued special treatment of structure Ast: no pervasive content, no inclusion in structure Syntax;
wenzelm
parents: 40627
diff changeset
    88
        syntax_string (map dest_char (Ast.unfold_ast @{syntax_const "_args"} args))]
51160
599ff65b85e2 systematic conversions between nat and nibble/char;
haftmann
parents: 46483
diff changeset
    89
  | list_ast_tr' _ = raise Match;
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    90
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    91
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    92
(* theory setup *)
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    93
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
    94
val setup =
52143
36ffe23b25f8 syntax translations always depend on context;
wenzelm
parents: 51160
diff changeset
    95
  Sign.parse_ast_translation
36ffe23b25f8 syntax translations always depend on context;
wenzelm
parents: 51160
diff changeset
    96
   [(@{syntax_const "_Char"}, K char_ast_tr),
36ffe23b25f8 syntax translations always depend on context;
wenzelm
parents: 51160
diff changeset
    97
    (@{syntax_const "_String"}, K string_ast_tr)] #>
36ffe23b25f8 syntax translations always depend on context;
wenzelm
parents: 51160
diff changeset
    98
  Sign.print_ast_translation
36ffe23b25f8 syntax translations always depend on context;
wenzelm
parents: 51160
diff changeset
    99
   [(@{const_syntax Char}, K char_ast_tr'),
36ffe23b25f8 syntax translations always depend on context;
wenzelm
parents: 51160
diff changeset
   100
    (@{syntax_const "_list"}, K list_ast_tr')];
21759
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   101
f4b20360751f Concrete syntax for hex chars and strings.
wenzelm
parents:
diff changeset
   102
end;