| author | wenzelm | 
| Sun, 18 Aug 2024 15:08:32 +0200 | |
| changeset 80719 | 636458836fca | 
| parent 69593 | 3dda49e08b9d | 
| child 80795 | 5e38e8b34eb3 | 
| permissions | -rw-r--r-- | 
| 21759 | 1 | (* Title: HOL/Tools/string_syntax.ML | 
| 2 | Author: Makarius | |
| 3 | ||
| 68028 | 4 | Concrete syntax for characters and strings. | 
| 21759 | 5 | *) | 
| 6 | ||
| 68028 | 7 | signature STRING_SYNTAX = sig | 
| 8 | val hex: int -> string | |
| 9 | val mk_bits_syntax: int -> int -> term list | |
| 10 | val dest_bits_syntax: term list -> int | |
| 68939 
bcce5967e10e
more explicit notion of ord value for HOL characters
 haftmann parents: 
68099diff
changeset | 11 | val ascii_ord_of: string -> int | 
| 68028 | 12 | val plain_strings_of: string -> string list | 
| 13 | datatype character = Char of string | Ord of int | |
| 14 | val classify_character: int -> character | |
| 15 | end | |
| 16 | ||
| 17 | structure String_Syntax: STRING_SYNTAX = | |
| 21759 | 18 | struct | 
| 19 | ||
| 62597 | 20 | (* numeral *) | 
| 21759 | 21 | |
| 62597 | 22 | fun hex_digit n = if n = 10 then "A" | 
| 23 | else if n = 11 then "B" | |
| 24 | else if n = 12 then "C" | |
| 25 | else if n = 13 then "D" | |
| 26 | else if n = 14 then "E" | |
| 27 | else if n = 15 then "F" | |
| 28 | else string_of_int n; | |
| 21759 | 29 | |
| 62597 | 30 | fun hex_prefix ms = "0x" ^ implode (replicate (2 - length ms) "0" @ ms); | 
| 31 | ||
| 32 | fun hex n = hex_prefix (map hex_digit (radixpand (16, n))); | |
| 21759 | 33 | |
| 34 | ||
| 68028 | 35 | (* booleans as bits *) | 
| 36 | ||
| 37 | fun mk_bit_syntax b = | |
| 69593 | 38 | Syntax.const (if b = 1 then \<^const_syntax>\<open>True\<close> else \<^const_syntax>\<open>False\<close>); | 
| 68028 | 39 | |
| 40 | fun mk_bits_syntax len = map mk_bit_syntax o Integer.radicify 2 len; | |
| 41 | ||
| 69593 | 42 | fun dest_bit_syntax (Const (\<^const_syntax>\<open>True\<close>, _)) = 1 | 
| 43 | | dest_bit_syntax (Const (\<^const_syntax>\<open>False\<close>, _)) = 0 | |
| 68028 | 44 | | dest_bit_syntax _ = raise Match; | 
| 45 | ||
| 46 | val dest_bits_syntax = Integer.eval_radix 2 o map dest_bit_syntax; | |
| 47 | ||
| 48 | ||
| 21759 | 49 | (* char *) | 
| 50 | ||
| 68939 
bcce5967e10e
more explicit notion of ord value for HOL characters
 haftmann parents: 
68099diff
changeset | 51 | fun ascii_ord_of c = | 
| 
bcce5967e10e
more explicit notion of ord value for HOL characters
 haftmann parents: 
68099diff
changeset | 52 | if Symbol.is_ascii c then ord c | 
| 
bcce5967e10e
more explicit notion of ord value for HOL characters
 haftmann parents: 
68099diff
changeset | 53 | else if c = "\<newline>" then 10 | 
| 
bcce5967e10e
more explicit notion of ord value for HOL characters
 haftmann parents: 
68099diff
changeset | 54 |   else error ("Bad character: " ^ quote c);
 | 
| 
bcce5967e10e
more explicit notion of ord value for HOL characters
 haftmann parents: 
68099diff
changeset | 55 | |
| 68028 | 56 | fun mk_char_syntax i = | 
| 69593 | 57 | list_comb (Syntax.const \<^const_syntax>\<open>Char\<close>, mk_bits_syntax 8 i); | 
| 62597 | 58 | |
| 68028 | 59 | fun plain_strings_of str = | 
| 62597 | 60 | map fst (Lexicon.explode_str (str, Position.none)); | 
| 61 | ||
| 68028 | 62 | datatype character = Char of string | Ord of int; | 
| 21759 | 63 | |
| 40627 
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
 wenzelm parents: 
35363diff
changeset | 64 | val specials = raw_explode "\\\"`'"; | 
| 21759 | 65 | |
| 68028 | 66 | fun classify_character i = | 
| 67 | let | |
| 68 | val c = chr i | |
| 69 | in | |
| 70 | if not (member (op =) specials c) andalso Symbol.is_ascii c andalso Symbol.is_printable c | |
| 71 | then Char c | |
| 72 | else if c = "\n" | |
| 73 | then Char "\<newline>" | |
| 74 | else Ord i | |
| 75 | end; | |
| 76 | ||
| 77 | fun dest_char_syntax b0 b1 b2 b3 b4 b5 b6 b7 = | |
| 78 | classify_character (dest_bits_syntax [b0, b1, b2, b3, b4, b5, b6, b7]) | |
| 21759 | 79 | |
| 69593 | 80 | fun dest_char_ast (Ast.Appl [Ast.Constant \<^syntax_const>\<open>_Char\<close>, Ast.Constant s]) = | 
| 68028 | 81 | plain_strings_of s | 
| 62597 | 82 | | dest_char_ast _ = raise Match; | 
| 21759 | 83 | |
| 69593 | 84 | fun char_tr [(c as Const (\<^syntax_const>\<open>_constrain\<close>, _)) $ t $ u] = | 
| 62597 | 85 | c $ char_tr [t] $ u | 
| 86 | | char_tr [Free (str, _)] = | |
| 68028 | 87 | (case plain_strings_of str of | 
| 68939 
bcce5967e10e
more explicit notion of ord value for HOL characters
 haftmann parents: 
68099diff
changeset | 88 | [c] => mk_char_syntax (ascii_ord_of c) | 
| 62597 | 89 |       | _ => error ("Single character expected: " ^ str))
 | 
| 90 |   | char_tr ts = raise TERM ("char_tr", ts);
 | |
| 21759 | 91 | |
| 69593 | 92 | fun char_ord_tr [(c as Const (\<^syntax_const>\<open>_constrain\<close>, _)) $ t $ u] = | 
| 62597 | 93 | c $ char_ord_tr [t] $ u | 
| 94 | | char_ord_tr [Const (num, _)] = | |
| 95 | (mk_char_syntax o #value o Lexicon.read_num) num | |
| 96 |   | char_ord_tr ts = raise TERM ("char_ord_tr", ts);
 | |
| 21759 | 97 | |
| 68028 | 98 | fun char_tr' [b1, b2, b3, b4, b5, b6, b7, b8] = | 
| 99 | (case dest_char_syntax b1 b2 b3 b4 b5 b6 b7 b8 of | |
| 69593 | 100 | Char s => Syntax.const \<^syntax_const>\<open>_Char\<close> $ | 
| 62597 | 101 | Syntax.const (Lexicon.implode_str [s]) | 
| 69593 | 102 | | Ord n => Syntax.const \<^syntax_const>\<open>_Char_ord\<close> $ | 
| 68028 | 103 | Syntax.free (hex n)) | 
| 62597 | 104 | | char_tr' _ = raise Match; | 
| 21759 | 105 | |
| 106 | ||
| 107 | (* string *) | |
| 108 | ||
| 69593 | 109 | fun mk_string_syntax [] = Syntax.const \<^const_syntax>\<open>Nil\<close> | 
| 62597 | 110 | | mk_string_syntax (c :: cs) = | 
| 69593 | 111 | Syntax.const \<^const_syntax>\<open>Cons\<close> $ mk_char_syntax (ascii_ord_of c) | 
| 68939 
bcce5967e10e
more explicit notion of ord value for HOL characters
 haftmann parents: 
68099diff
changeset | 112 | $ mk_string_syntax cs; | 
| 21759 | 113 | |
| 62597 | 114 | fun mk_string_ast ss = | 
| 69593 | 115 | Ast.Appl [Ast.Constant \<^syntax_const>\<open>_inner_string\<close>, | 
| 62597 | 116 | Ast.Variable (Lexicon.implode_str ss)]; | 
| 117 | ||
| 69593 | 118 | fun string_tr [(c as Const (\<^syntax_const>\<open>_constrain\<close>, _)) $ t $ u] = | 
| 62597 | 119 | c $ string_tr [t] $ u | 
| 120 | | string_tr [Free (str, _)] = | |
| 68028 | 121 | mk_string_syntax (plain_strings_of str) | 
| 68099 | 122 |   | string_tr ts = raise TERM ("string_tr", ts);
 | 
| 21759 | 123 | |
| 35115 | 124 | fun list_ast_tr' [args] = | 
| 69593 | 125 | Ast.Appl [Ast.Constant \<^syntax_const>\<open>_String\<close>, | 
| 126 | (mk_string_ast o maps dest_char_ast o Ast.unfold_ast \<^syntax_const>\<open>_args\<close>) args] | |
| 51160 
599ff65b85e2
systematic conversions between nat and nibble/char;
 haftmann parents: 
46483diff
changeset | 127 | | list_ast_tr' _ = raise Match; | 
| 21759 | 128 | |
| 129 | ||
| 130 | (* theory setup *) | |
| 131 | ||
| 58822 | 132 | val _ = | 
| 133 | Theory.setup | |
| 62597 | 134 | (Sign.parse_translation | 
| 69593 | 135 | [(\<^syntax_const>\<open>_Char\<close>, K char_tr), | 
| 136 | (\<^syntax_const>\<open>_Char_ord\<close>, K char_ord_tr), | |
| 137 | (\<^syntax_const>\<open>_String\<close>, K string_tr)] #> | |
| 62597 | 138 | Sign.print_translation | 
| 69593 | 139 | [(\<^const_syntax>\<open>Char\<close>, K char_tr')] #> | 
| 58822 | 140 | Sign.print_ast_translation | 
| 69593 | 141 | [(\<^syntax_const>\<open>_list\<close>, K list_ast_tr')]); | 
| 21759 | 142 | |
| 68028 | 143 | end |