| author | wenzelm | 
| Wed, 31 Oct 2018 15:50:45 +0100 | |
| changeset 69215 | ab94035ba6ea | 
| parent 68939 | bcce5967e10e | 
| child 69593 | 3dda49e08b9d | 
| permissions | -rw-r--r-- | 
| 21759 | 1  | 
(* Title: HOL/Tools/string_syntax.ML  | 
2  | 
Author: Makarius  | 
|
3  | 
||
| 68028 | 4  | 
Concrete syntax for characters and strings.  | 
| 21759 | 5  | 
*)  | 
6  | 
||
| 68028 | 7  | 
signature STRING_SYNTAX = sig  | 
8  | 
val hex: int -> string  | 
|
9  | 
val mk_bits_syntax: int -> int -> term list  | 
|
10  | 
val dest_bits_syntax: term list -> int  | 
|
| 
68939
 
bcce5967e10e
more explicit notion of ord value for HOL characters
 
haftmann 
parents: 
68099 
diff
changeset
 | 
11  | 
val ascii_ord_of: string -> int  | 
| 68028 | 12  | 
val plain_strings_of: string -> string list  | 
13  | 
datatype character = Char of string | Ord of int  | 
|
14  | 
val classify_character: int -> character  | 
|
15  | 
end  | 
|
16  | 
||
17  | 
structure String_Syntax: STRING_SYNTAX =  | 
|
| 21759 | 18  | 
struct  | 
19  | 
||
| 62597 | 20  | 
(* numeral *)  | 
| 21759 | 21  | 
|
| 62597 | 22  | 
fun hex_digit n = if n = 10 then "A"  | 
23  | 
else if n = 11 then "B"  | 
|
24  | 
else if n = 12 then "C"  | 
|
25  | 
else if n = 13 then "D"  | 
|
26  | 
else if n = 14 then "E"  | 
|
27  | 
else if n = 15 then "F"  | 
|
28  | 
else string_of_int n;  | 
|
| 21759 | 29  | 
|
| 62597 | 30  | 
fun hex_prefix ms = "0x" ^ implode (replicate (2 - length ms) "0" @ ms);  | 
31  | 
||
32  | 
fun hex n = hex_prefix (map hex_digit (radixpand (16, n)));  | 
|
| 21759 | 33  | 
|
34  | 
||
| 68028 | 35  | 
(* booleans as bits *)  | 
36  | 
||
37  | 
fun mk_bit_syntax b =  | 
|
38  | 
  Syntax.const (if b = 1 then @{const_syntax True} else @{const_syntax False});
 | 
|
39  | 
||
40  | 
fun mk_bits_syntax len = map mk_bit_syntax o Integer.radicify 2 len;  | 
|
41  | 
||
42  | 
fun dest_bit_syntax (Const (@{const_syntax True}, _)) = 1 
 | 
|
43  | 
  | dest_bit_syntax (Const (@{const_syntax False}, _)) = 0
 | 
|
44  | 
| dest_bit_syntax _ = raise Match;  | 
|
45  | 
||
46  | 
val dest_bits_syntax = Integer.eval_radix 2 o map dest_bit_syntax;  | 
|
47  | 
||
48  | 
||
| 21759 | 49  | 
(* char *)  | 
50  | 
||
| 
68939
 
bcce5967e10e
more explicit notion of ord value for HOL characters
 
haftmann 
parents: 
68099 
diff
changeset
 | 
51  | 
fun ascii_ord_of c =  | 
| 
 
bcce5967e10e
more explicit notion of ord value for HOL characters
 
haftmann 
parents: 
68099 
diff
changeset
 | 
52  | 
if Symbol.is_ascii c then ord c  | 
| 
 
bcce5967e10e
more explicit notion of ord value for HOL characters
 
haftmann 
parents: 
68099 
diff
changeset
 | 
53  | 
else if c = "\<newline>" then 10  | 
| 
 
bcce5967e10e
more explicit notion of ord value for HOL characters
 
haftmann 
parents: 
68099 
diff
changeset
 | 
54  | 
  else error ("Bad character: " ^ quote c);
 | 
| 
 
bcce5967e10e
more explicit notion of ord value for HOL characters
 
haftmann 
parents: 
68099 
diff
changeset
 | 
55  | 
|
| 68028 | 56  | 
fun mk_char_syntax i =  | 
57  | 
  list_comb (Syntax.const @{const_syntax Char}, mk_bits_syntax 8 i);
 | 
|
| 62597 | 58  | 
|
| 68028 | 59  | 
fun plain_strings_of str =  | 
| 62597 | 60  | 
map fst (Lexicon.explode_str (str, Position.none));  | 
61  | 
||
| 68028 | 62  | 
datatype character = Char of string | Ord of int;  | 
| 21759 | 63  | 
|
| 
40627
 
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
 
wenzelm 
parents: 
35363 
diff
changeset
 | 
64  | 
val specials = raw_explode "\\\"`'";  | 
| 21759 | 65  | 
|
| 68028 | 66  | 
fun classify_character i =  | 
67  | 
let  | 
|
68  | 
val c = chr i  | 
|
69  | 
in  | 
|
70  | 
if not (member (op =) specials c) andalso Symbol.is_ascii c andalso Symbol.is_printable c  | 
|
71  | 
then Char c  | 
|
72  | 
else if c = "\n"  | 
|
73  | 
then Char "\<newline>"  | 
|
74  | 
else Ord i  | 
|
75  | 
end;  | 
|
76  | 
||
77  | 
fun dest_char_syntax b0 b1 b2 b3 b4 b5 b6 b7 =  | 
|
78  | 
classify_character (dest_bits_syntax [b0, b1, b2, b3, b4, b5, b6, b7])  | 
|
| 21759 | 79  | 
|
| 62597 | 80  | 
fun dest_char_ast (Ast.Appl [Ast.Constant @{syntax_const "_Char"}, Ast.Constant s]) =
 | 
| 68028 | 81  | 
plain_strings_of s  | 
| 62597 | 82  | 
| dest_char_ast _ = raise Match;  | 
| 21759 | 83  | 
|
| 62597 | 84  | 
fun char_tr [(c as Const (@{syntax_const "_constrain"}, _)) $ t $ u] =
 | 
85  | 
c $ char_tr [t] $ u  | 
|
86  | 
| char_tr [Free (str, _)] =  | 
|
| 68028 | 87  | 
(case plain_strings_of str of  | 
| 
68939
 
bcce5967e10e
more explicit notion of ord value for HOL characters
 
haftmann 
parents: 
68099 
diff
changeset
 | 
88  | 
[c] => mk_char_syntax (ascii_ord_of c)  | 
| 62597 | 89  | 
      | _ => error ("Single character expected: " ^ str))
 | 
90  | 
  | char_tr ts = raise TERM ("char_tr", ts);
 | 
|
| 21759 | 91  | 
|
| 62597 | 92  | 
fun char_ord_tr [(c as Const (@{syntax_const "_constrain"}, _)) $ t $ u] =
 | 
93  | 
c $ char_ord_tr [t] $ u  | 
|
94  | 
| char_ord_tr [Const (num, _)] =  | 
|
95  | 
(mk_char_syntax o #value o Lexicon.read_num) num  | 
|
96  | 
  | char_ord_tr ts = raise TERM ("char_ord_tr", ts);
 | 
|
| 21759 | 97  | 
|
| 68028 | 98  | 
fun char_tr' [b1, b2, b3, b4, b5, b6, b7, b8] =  | 
99  | 
(case dest_char_syntax b1 b2 b3 b4 b5 b6 b7 b8 of  | 
|
| 62597 | 100  | 
        Char s => Syntax.const @{syntax_const "_Char"} $
 | 
101  | 
Syntax.const (Lexicon.implode_str [s])  | 
|
| 68028 | 102  | 
      | Ord n => Syntax.const @{syntax_const "_Char_ord"} $
 | 
103  | 
Syntax.free (hex n))  | 
|
| 62597 | 104  | 
| char_tr' _ = raise Match;  | 
| 21759 | 105  | 
|
106  | 
||
107  | 
(* string *)  | 
|
108  | 
||
| 62597 | 109  | 
fun mk_string_syntax [] = Syntax.const @{const_syntax Nil}
 | 
110  | 
| mk_string_syntax (c :: cs) =  | 
|
| 
68939
 
bcce5967e10e
more explicit notion of ord value for HOL characters
 
haftmann 
parents: 
68099 
diff
changeset
 | 
111  | 
      Syntax.const @{const_syntax Cons} $ mk_char_syntax (ascii_ord_of c)
 | 
| 
 
bcce5967e10e
more explicit notion of ord value for HOL characters
 
haftmann 
parents: 
68099 
diff
changeset
 | 
112  | 
$ mk_string_syntax cs;  | 
| 21759 | 113  | 
|
| 62597 | 114  | 
fun mk_string_ast ss =  | 
115  | 
  Ast.Appl [Ast.Constant @{syntax_const "_inner_string"},
 | 
|
116  | 
Ast.Variable (Lexicon.implode_str ss)];  | 
|
117  | 
||
118  | 
fun string_tr [(c as Const (@{syntax_const "_constrain"}, _)) $ t $ u] =
 | 
|
119  | 
c $ string_tr [t] $ u  | 
|
120  | 
| string_tr [Free (str, _)] =  | 
|
| 68028 | 121  | 
mk_string_syntax (plain_strings_of str)  | 
| 68099 | 122  | 
  | string_tr ts = raise TERM ("string_tr", ts);
 | 
| 21759 | 123  | 
|
| 35115 | 124  | 
fun list_ast_tr' [args] =  | 
| 
42224
 
578a51fae383
discontinued special treatment of structure Ast: no pervasive content, no inclusion in structure Syntax;
 
wenzelm 
parents: 
40627 
diff
changeset
 | 
125  | 
      Ast.Appl [Ast.Constant @{syntax_const "_String"},
 | 
| 62597 | 126  | 
        (mk_string_ast o maps dest_char_ast o Ast.unfold_ast @{syntax_const "_args"}) args]
 | 
| 
51160
 
599ff65b85e2
systematic conversions between nat and nibble/char;
 
haftmann 
parents: 
46483 
diff
changeset
 | 
127  | 
| list_ast_tr' _ = raise Match;  | 
| 21759 | 128  | 
|
129  | 
||
130  | 
(* theory setup *)  | 
|
131  | 
||
| 58822 | 132  | 
val _ =  | 
133  | 
Theory.setup  | 
|
| 62597 | 134  | 
(Sign.parse_translation  | 
135  | 
     [(@{syntax_const "_Char"}, K char_tr),
 | 
|
136  | 
      (@{syntax_const "_Char_ord"}, K char_ord_tr),
 | 
|
137  | 
      (@{syntax_const "_String"}, K string_tr)] #>
 | 
|
138  | 
Sign.print_translation  | 
|
139  | 
     [(@{const_syntax Char}, K char_tr')] #>
 | 
|
| 58822 | 140  | 
Sign.print_ast_translation  | 
| 62597 | 141  | 
     [(@{syntax_const "_list"}, K list_ast_tr')]);
 | 
| 21759 | 142  | 
|
| 68028 | 143  | 
end  |