| author | paulson <lp15@cam.ac.uk> | 
| Wed, 26 Oct 2022 17:22:12 +0100 | |
| changeset 76376 | 934d4aed8497 | 
| parent 73198 | a9eaf8c3b728 | 
| child 77846 | 5ba68d3bd741 | 
| permissions | -rw-r--r-- | 
| 18 | 1 | (* Title: Pure/Syntax/lexicon.ML | 
| 2 | Author: Tobias Nipkow and Markus Wenzel, TU Muenchen | |
| 0 | 3 | |
| 4703 | 4 | Lexer for the inner Isabelle syntax (terms and types). | 
| 18 | 5 | *) | 
| 0 | 6 | |
| 42290 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 7 | signature LEXICON = | 
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 8 | sig | 
| 42476 | 9 | structure Syntax: | 
| 10 | sig | |
| 11 | val const: string -> term | |
| 12 | val free: string -> term | |
| 13 | val var: indexname -> term | |
| 14 | end | |
| 67426 | 15 | val scan_id: Symbol_Pos.T list scanner | 
| 16 | val scan_longid: Symbol_Pos.T list scanner | |
| 17 | val scan_tid: Symbol_Pos.T list scanner | |
| 18 | val scan_hex: Symbol_Pos.T list scanner | |
| 19 | val scan_bin: Symbol_Pos.T list scanner | |
| 20 | val scan_var: Symbol_Pos.T list scanner | |
| 21 | val scan_tvar: Symbol_Pos.T list scanner | |
| 50239 | 22 | val is_tid: string -> bool | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 23 | datatype token_kind = | 
| 67548 | 24 | Literal | Ident | Long_Ident | Var | Type_Ident | Type_Var | Num | Float | Str | | 
| 69042 | 25 | String | Cartouche | Space | Comment of Comment.kind | Dummy | EOF | 
| 67554 | 26 | eqtype token | 
| 67412 | 27 | val kind_of_token: token -> token_kind | 
| 27806 | 28 | val str_of_token: token -> string | 
| 29 | val pos_of_token: token -> Position.T | |
| 67551 | 30 | val end_pos_of_token: token -> Position.T | 
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 31 | val is_proper: token -> bool | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 32 | val dummy: token | 
| 67552 | 33 | val literal: string -> token | 
| 34 | val is_literal: token -> bool | |
| 70586 | 35 | val tokens_match_ord: token ord | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 36 | val mk_eof: Position.T -> token | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 37 | val eof: token | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 38 | val is_eof: token -> bool | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 39 | val stopper: token Scan.stopper | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 40 | val terminals: string list | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 41 | val is_terminal: string -> bool | 
| 67556 | 42 | val get_terminal: string -> token option | 
| 73163 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 43 | val suppress_literal_markup: string -> bool | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 44 | val suppress_markup: token -> bool | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 45 | val literal_markup: string -> Markup.T list | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 46 | val reports_of_token: token -> Position.report list | 
| 39510 
d9f5f01faa1b
Syntax.read_asts error: report token ranges within message -- no side-effect here;
 wenzelm parents: 
39507diff
changeset | 47 | val reported_token_range: Proof.context -> token -> string | 
| 18 | 48 | val valued_token: token -> bool | 
| 55108 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 49 | val implode_string: Symbol.symbol list -> string | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 50 | val explode_string: string * Position.T -> Symbol_Pos.T list | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 51 | val implode_str: Symbol.symbol list -> string | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 52 | val explode_str: string * Position.T -> Symbol_Pos.T list | 
| 30573 | 53 | val tokenize: Scan.lexicon -> bool -> Symbol_Pos.T list -> token list | 
| 42290 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 54 | val read_indexname: string -> indexname | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 55 | val read_var: string -> term | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 56 | val read_variable: string -> indexname option | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 57 | val read_nat: string -> int option | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 58 | val read_int: string -> int option | 
| 58421 | 59 |   val read_num: string -> {radix: int, leading_zeros: int, value: int}
 | 
| 42290 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 60 |   val read_float: string -> {mant: int, exp: int}
 | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 61 | val mark_class: string -> string val unmark_class: string -> string | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 62 | val mark_type: string -> string val unmark_type: string -> string | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 63 | val mark_const: string -> string val unmark_const: string -> string | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 64 | val mark_fixed: string -> string val unmark_fixed: string -> string | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 65 | val unmark: | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 66 |    {case_class: string -> 'a,
 | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 67 | case_type: string -> 'a, | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 68 | case_const: string -> 'a, | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 69 | case_fixed: string -> 'a, | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 70 | case_default: string -> 'a} -> string -> 'a | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 71 | val is_marked: string -> bool | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 72 | val dummy_type: term | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 73 | val fun_type: term | 
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 74 | end; | 
| 0 | 75 | |
| 14679 | 76 | structure Lexicon: LEXICON = | 
| 0 | 77 | struct | 
| 78 | ||
| 69344 | 79 | (** syntactic terms **) | 
| 42476 | 80 | |
| 81 | structure Syntax = | |
| 82 | struct | |
| 83 | ||
| 84 | fun const c = Const (c, dummyT); | |
| 85 | fun free x = Free (x, dummyT); | |
| 86 | fun var xi = Var (xi, dummyT); | |
| 87 | ||
| 88 | end; | |
| 89 | ||
| 90 | ||
| 91 | ||
| 4703 | 92 | (** basic scanners **) | 
| 93 | ||
| 30573 | 94 | open Basic_Symbol_Pos; | 
| 27773 | 95 | |
| 55105 | 96 | val err_prefix = "Inner lexical error: "; | 
| 97 | ||
| 98 | fun !!! msg = Symbol_Pos.!!! (fn () => err_prefix ^ msg); | |
| 27773 | 99 | |
| 50239 | 100 | val scan_id = Symbol_Pos.scan_ident; | 
| 61476 | 101 | val scan_longid = scan_id @@@ Scan.repeats1 ($$$ "." @@@ scan_id); | 
| 27773 | 102 | val scan_tid = $$$ "'" @@@ scan_id; | 
| 4703 | 103 | |
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40290diff
changeset | 104 | val scan_hex = $$$ "0" @@@ $$$ "x" @@@ Scan.many1 (Symbol.is_ascii_hex o Symbol_Pos.symbol); | 
| 27773 | 105 | val scan_bin = $$$ "0" @@@ $$$ "b" @@@ Scan.many1 (fn (s, _) => s = "0" orelse s = "1"); | 
| 62782 | 106 | val scan_num = scan_hex || scan_bin || Symbol_Pos.scan_nat; | 
| 4703 | 107 | |
| 62782 | 108 | val scan_id_nat = scan_id @@@ Scan.optional ($$$ "." @@@ Symbol_Pos.scan_nat) []; | 
| 27773 | 109 | val scan_var = $$$ "?" @@@ scan_id_nat; | 
| 110 | val scan_tvar = $$$ "?" @@@ $$$ "'" @@@ scan_id_nat; | |
| 4703 | 111 | |
| 50239 | 112 | fun is_tid s = | 
| 113 | (case try (unprefix "'") s of | |
| 114 | SOME s' => Symbol_Pos.is_identifier s' | |
| 115 | | NONE => false); | |
| 116 | ||
| 4703 | 117 | |
| 118 | ||
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 119 | (** tokens **) | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 120 | |
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 121 | (* datatype token_kind *) | 
| 0 | 122 | |
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 123 | datatype token_kind = | 
| 67548 | 124 | Literal | Ident | Long_Ident | Var | Type_Ident | Type_Var | Num | Float | Str | | 
| 69042 | 125 | String | Cartouche | Space | Comment of Comment.kind | Dummy | EOF; | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 126 | |
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 127 | val token_kinds = | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 128 | Vector.fromList | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 129 | [Literal, Ident, Long_Ident, Var, Type_Ident, Type_Var, Num, Float, Str, | 
| 69042 | 130 | String, Cartouche, Space, Comment Comment.Comment, Comment Comment.Cancel, | 
| 131 | Comment Comment.Latex, Dummy, EOF]; | |
| 67539 | 132 | |
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 133 | fun token_kind i = Vector.sub (token_kinds, i); | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 134 | fun token_kind_index k = #1 (the (Vector.findi (fn (_, k') => k = k') token_kinds)); | 
| 67539 | 135 | |
| 136 | ||
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 137 | (* datatype token *) | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 138 | |
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 139 | datatype token = Token of int * string * Position.range; | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 140 | |
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 141 | fun index_of_token (Token (i, _, _)) = i; | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 142 | val kind_of_token = index_of_token #> token_kind; | 
| 27806 | 143 | fun str_of_token (Token (_, s, _)) = s; | 
| 144 | fun pos_of_token (Token (_, _, (pos, _))) = pos; | |
| 67551 | 145 | fun end_pos_of_token (Token (_, _, (_, end_pos))) = end_pos; | 
| 27806 | 146 | |
| 67425 
7d4a088dbc0e
clarified modules: uniform notion of formal comments;
 wenzelm parents: 
67413diff
changeset | 147 | val is_proper = kind_of_token #> (fn Space => false | Comment _ => false | _ => true); | 
| 27887 | 148 | |
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 149 | val dummy = Token (token_kind_index Dummy, "", Position.no_range); | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 150 | |
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 151 | |
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 152 | (* literals *) | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 153 | |
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 154 | val literal_index = token_kind_index Literal; | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 155 | fun literal s = Token (literal_index, s, Position.no_range); | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 156 | fun is_literal tok = index_of_token tok = literal_index; | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 157 | |
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 158 | fun tokens_match_ord toks = | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 159 | let val is = apply2 index_of_token toks in | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 160 | (case int_ord is of | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 161 | EQUAL => if #1 is = literal_index then fast_string_ord (apply2 str_of_token toks) else EQUAL | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 162 | | ord => ord) | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 163 | end; | 
| 67550 | 164 | |
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 165 | |
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 166 | (* stopper *) | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 167 | |
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 168 | val eof_index = token_kind_index EOF; | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 169 | fun mk_eof pos = Token (eof_index, "", (pos, Position.none)); | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 170 | val eof = mk_eof Position.none; | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 171 | |
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 172 | fun is_eof tok = index_of_token tok = eof_index; | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 173 | val stopper = Scan.stopper (K eof) is_eof; | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 174 | |
| 0 | 175 | |
| 67556 | 176 | (* terminal symbols *) | 
| 0 | 177 | |
| 67556 | 178 | val terminal_symbols = | 
| 179 |   [("id", Ident),
 | |
| 180 |    ("longid", Long_Ident),
 | |
| 181 |    ("var", Var),
 | |
| 182 |    ("tid", Type_Ident),
 | |
| 183 |    ("tvar", Type_Var),
 | |
| 184 |    ("num_token", Num),
 | |
| 185 |    ("float_token", Float),
 | |
| 186 |    ("str_token", Str),
 | |
| 187 |    ("string_token", String),
 | |
| 188 |    ("cartouche", Cartouche)]
 | |
| 189 | |> map (apsnd token_kind_index) | |
| 190 | |> Symtab.make; | |
| 29156 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 191 | |
| 67556 | 192 | val terminals = Symtab.keys terminal_symbols; | 
| 193 | val is_terminal = Symtab.defined terminal_symbols; | |
| 194 | fun get_terminal s = | |
| 195 | (case Symtab.lookup terminal_symbols s of | |
| 196 | SOME i => SOME (Token (i, s, Position.no_range)) | |
| 197 | | NONE => NONE); | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 198 | |
| 0 | 199 | |
| 27887 | 200 | (* markup *) | 
| 201 | ||
| 73198 
a9eaf8c3b728
tuned signature (e.g. see HTML.control_block in Isabelle/Scala);
 wenzelm parents: 
73163diff
changeset | 202 | val suppress_literal_markup = Symbol.has_control_block o Symbol.explode; | 
| 73163 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 203 | fun suppress_markup tok = is_literal tok andalso suppress_literal_markup (str_of_token tok); | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 204 | |
| 49821 
d15fe10593ff
clarified output token markup (see also bc22daeed49e);
 wenzelm parents: 
48992diff
changeset | 205 | fun literal_markup s = | 
| 73163 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 206 | let val syms = Symbol.explode s in | 
| 73198 
a9eaf8c3b728
tuned signature (e.g. see HTML.control_block in Isabelle/Scala);
 wenzelm parents: 
73163diff
changeset | 207 | if Symbol.has_control_block syms then [] | 
| 73163 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 208 | else if Symbol.is_ascii_identifier s orelse exists Symbol.is_letter syms | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 209 | then [Markup.literal] | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 210 | else [Markup.delimiter] | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 211 | end; | 
| 49821 
d15fe10593ff
clarified output token markup (see also bc22daeed49e);
 wenzelm parents: 
48992diff
changeset | 212 | |
| 27887 | 213 | val token_kind_markup = | 
| 73163 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 214 | fn Type_Ident => [Markup.tfree] | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 215 | | Type_Var => [Markup.tvar] | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 216 | | Num => [Markup.numeral] | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 217 | | Float => [Markup.numeral] | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 218 | | Str => [Markup.inner_string] | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 219 | | String => [Markup.inner_string] | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 220 | | Cartouche => [Markup.inner_cartouche] | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 221 | | Comment _ => [Markup.comment1] | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 222 | | _ => []; | 
| 27887 | 223 | |
| 73163 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 224 | fun reports_of_token tok = | 
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 225 | let | 
| 73163 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 226 | val pos = pos_of_token tok; | 
| 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 227 | val markups = | 
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 228 | if is_literal tok then literal_markup (str_of_token tok) | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 229 | else token_kind_markup (kind_of_token tok); | 
| 73163 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 wenzelm parents: 
70586diff
changeset | 230 | in map (pair pos) markups end; | 
| 27887 | 231 | |
| 39510 
d9f5f01faa1b
Syntax.read_asts error: report token ranges within message -- no side-effect here;
 wenzelm parents: 
39507diff
changeset | 232 | fun reported_token_range ctxt tok = | 
| 39168 
e3ac771235f7
report token range after inner parse error -- often provides important clues about misunderstanding concerning lexical phase;
 wenzelm parents: 
38474diff
changeset | 233 | if is_proper tok | 
| 50201 
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
 wenzelm parents: 
49821diff
changeset | 234 | then Context_Position.reported_text ctxt (pos_of_token tok) Markup.token_range "" | 
| 39510 
d9f5f01faa1b
Syntax.read_asts error: report token ranges within message -- no side-effect here;
 wenzelm parents: 
39507diff
changeset | 235 | else ""; | 
| 39168 
e3ac771235f7
report token range after inner parse error -- often provides important clues about misunderstanding concerning lexical phase;
 wenzelm parents: 
38474diff
changeset | 236 | |
| 27887 | 237 | |
| 18 | 238 | (* valued_token *) | 
| 0 | 239 | |
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 240 | fun valued_token tok = | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 241 | not (is_literal tok orelse is_eof tok); | 
| 0 | 242 | |
| 243 | ||
| 55108 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 244 | |
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 245 | (** string literals **) | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 246 | |
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 247 | fun explode_literal scan_body (str, pos) = | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 248 | (case Scan.read Symbol_Pos.stopper scan_body (Symbol_Pos.explode (str, pos)) of | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 249 | SOME ss => ss | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 250 | | _ => error (err_prefix ^ "malformed string literal " ^ quote str ^ Position.here pos)); | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 251 | |
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 252 | |
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 253 | (* string *) | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 254 | |
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 255 | val scan_string = Scan.trace (Symbol_Pos.scan_string_qq err_prefix) >> #2; | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 256 | val scan_string_body = Symbol_Pos.scan_string_qq err_prefix >> (#1 o #2); | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 257 | |
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 258 | fun implode_string ss = quote (implode (map (fn "\"" => "\\\"" | s => s) ss)); | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 259 | val explode_string = explode_literal scan_string_body; | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 260 | |
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 261 | |
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 262 | (* str *) | 
| 18 | 263 | |
| 14730 | 264 | val scan_chr = | 
| 55107 | 265 | $$ "\\" |-- $$$ "'" || | 
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40290diff
changeset | 266 | Scan.one | 
| 58854 | 267 | ((fn s => s <> "\\" andalso s <> "'" andalso Symbol.not_eof s) o | 
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40290diff
changeset | 268 | Symbol_Pos.symbol) >> single || | 
| 55107 | 269 | $$$ "'" --| Scan.ahead (~$$ "'"); | 
| 14730 | 270 | |
| 271 | val scan_str = | |
| 55106 | 272 | Scan.ahead ($$ "'" -- $$ "'") |-- | 
| 273 | !!! "unclosed string literal" | |
| 61476 | 274 | ($$$ "'" @@@ $$$ "'" @@@ Scan.repeats scan_chr @@@ $$$ "'" @@@ $$$ "'"); | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 275 | |
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 276 | val scan_str_body = | 
| 55107 | 277 | Scan.ahead ($$ "'" |-- $$ "'") |-- | 
| 55106 | 278 | !!! "unclosed string literal" | 
| 61476 | 279 | ($$ "'" |-- $$ "'" |-- Scan.repeats scan_chr --| $$ "'" --| $$ "'"); | 
| 14730 | 280 | |
| 55108 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 281 | fun implode_str ss = enclose "''" "''" (implode (map (fn "'" => "\\'" | s => s) ss)); | 
| 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 282 | val explode_str = explode_literal scan_str_body; | 
| 18 | 283 | |
| 284 | ||
| 27806 | 285 | |
| 18 | 286 | (** tokenize **) | 
| 287 | ||
| 67412 | 288 | val token_leq = op <= o apply2 str_of_token; | 
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 289 | |
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 290 | fun token kind = | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 291 | let val i = token_kind_index kind | 
| 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 292 | in fn ss => Token (i, Symbol_Pos.content ss, Symbol_Pos.range ss) end; | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 293 | |
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 294 | fun tokenize lex xids syms = | 
| 18 | 295 | let | 
| 296 | val scan_xid = | |
| 55962 
fbd0e768bc8f
special identifier "__" (i.e. empty name with internal suffix) serves as wild-card for completion;
 wenzelm parents: 
55624diff
changeset | 297 | (if xids then $$$ "_" @@@ scan_id || scan_id else scan_id) || | 
| 
fbd0e768bc8f
special identifier "__" (i.e. empty name with internal suffix) serves as wild-card for completion;
 wenzelm parents: 
55624diff
changeset | 298 | $$$ "_" @@@ $$$ "_"; | 
| 18 | 299 | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 300 | val scan_val = | 
| 67548 | 301 | scan_tvar >> token Type_Var || | 
| 302 | scan_var >> token Var || | |
| 303 | scan_tid >> token Type_Ident || | |
| 304 | Symbol_Pos.scan_float >> token Float || | |
| 305 | scan_num >> token Num || | |
| 306 | scan_longid >> token Long_Ident || | |
| 307 | scan_xid >> token Ident; | |
| 18 | 308 | |
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 309 | val scan_lit = Scan.literal lex >> token Literal; | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 310 | |
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 311 | val scan = | 
| 55105 | 312 | Symbol_Pos.scan_cartouche err_prefix >> token Cartouche || | 
| 69891 
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
 wenzelm parents: 
69344diff
changeset | 313 | Comment.scan_inner >> (fn (kind, ss) => token (Comment kind) ss) || | 
| 27887 | 314 | Scan.max token_leq scan_lit scan_val || | 
| 67548 | 315 | scan_string >> token String || | 
| 316 | scan_str >> token Str || | |
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40290diff
changeset | 317 | Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol) >> token Space; | 
| 18 | 318 | in | 
| 67555 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 wenzelm parents: 
67554diff
changeset | 319 | (case Scan.error (Scan.finite Symbol_Pos.stopper (Scan.repeat scan)) syms of | 
| 27887 | 320 | (toks, []) => toks | 
| 55108 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 wenzelm parents: 
55107diff
changeset | 321 | | (_, ss) => | 
| 55624 | 322 |         error ("Inner lexical error" ^ Position.here (#1 (Symbol_Pos.range ss)) ^
 | 
| 323 |           Markup.markup Markup.no_report ("\nat " ^ quote (Symbol_Pos.content ss))))
 | |
| 18 | 324 | end; | 
| 325 | ||
| 326 | ||
| 327 | ||
| 328 | (** scan variables **) | |
| 329 | ||
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 330 | (* scan_indexname *) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 331 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 332 | local | 
| 18 | 333 | |
| 27773 | 334 | val scan_vname = | 
| 18 | 335 | let | 
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 336 | fun nat n [] = n | 
| 64275 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
62819diff
changeset | 337 | | nat n (c :: cs) = nat (n * 10 + (ord c - Char.ord #"0")) cs; | 
| 18 | 338 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 339 | fun idxname cs ds = (implode (rev cs), nat 0 ds); | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 340 | fun chop_idx [] ds = idxname [] ds | 
| 62529 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
61476diff
changeset | 341 | | chop_idx (cs as (_ :: "\<^sub>" :: _)) ds = idxname cs ds | 
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 342 | | chop_idx (c :: cs) ds = | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 343 | if Symbol.is_digit c then chop_idx cs (c :: ds) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 344 | else idxname (c :: cs) ds; | 
| 18 | 345 | |
| 50239 | 346 | val scan = | 
| 347 | (scan_id >> map Symbol_Pos.symbol) -- | |
| 62782 | 348 | Scan.optional ($$ "." |-- Symbol_Pos.scan_nat >> (nat 0 o map Symbol_Pos.symbol)) ~1; | 
| 18 | 349 | in | 
| 27773 | 350 | scan >> | 
| 351 | (fn (cs, ~1) => chop_idx (rev cs) [] | |
| 352 | | (cs, i) => (implode cs, i)) | |
| 18 | 353 | end; | 
| 354 | ||
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 355 | in | 
| 18 | 356 | |
| 55107 | 357 | val scan_indexname = $$ "'" |-- scan_vname >> (fn (x, i) => ("'" ^ x, i)) || scan_vname;
 | 
| 15443 
07f78cc82a73
indexname function now parses type variables as well; changed input
 berghofe parents: 
14981diff
changeset | 358 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 359 | end; | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 360 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 361 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 362 | (* indexname *) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 363 | |
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 364 | fun read_indexname s = | 
| 62751 | 365 | (case Scan.read Symbol_Pos.stopper scan_indexname (Symbol_Pos.explode0 s) of | 
| 15531 | 366 | SOME xi => xi | 
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 367 |   | _ => error ("Lexical error in variable name: " ^ quote s));
 | 
| 18 | 368 | |
| 369 | ||
| 4703 | 370 | (* read_var *) | 
| 18 | 371 | |
| 4703 | 372 | fun read_var str = | 
| 18 | 373 | let | 
| 374 | val scan = | |
| 55107 | 375 | $$ "?" |-- scan_indexname --| Scan.ahead (Scan.one Symbol_Pos.is_eof) | 
| 42476 | 376 | >> Syntax.var || | 
| 58854 | 377 | Scan.many (Symbol.not_eof o Symbol_Pos.symbol) | 
| 42476 | 378 | >> (Syntax.free o implode o map Symbol_Pos.symbol); | 
| 62751 | 379 | in the (Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode0 str)) end; | 
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 380 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 381 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 382 | (* read_variable *) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 383 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 384 | fun read_variable str = | 
| 55107 | 385 | let val scan = $$ "?" |-- scan_indexname || scan_indexname | 
| 62751 | 386 | in Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode0 str) end; | 
| 4587 | 387 | |
| 388 | ||
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 389 | (* read numbers *) | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 390 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 391 | local | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 392 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 393 | fun nat cs = | 
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40290diff
changeset | 394 | Option.map (#1 o Library.read_int o map Symbol_Pos.symbol) | 
| 62782 | 395 | (Scan.read Symbol_Pos.stopper Symbol_Pos.scan_nat cs); | 
| 5860 | 396 | |
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 397 | in | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 398 | |
| 62751 | 399 | fun read_nat s = nat (Symbol_Pos.explode0 s); | 
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 400 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 401 | fun read_int s = | 
| 62751 | 402 | (case Symbol_Pos.explode0 s of | 
| 27773 | 403 |     ("-", _) :: cs => Option.map ~ (nat cs)
 | 
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 404 | | cs => nat cs); | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 405 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 406 | end; | 
| 5860 | 407 | |
| 408 | ||
| 58421 | 409 | (* read_num: hex/bin/decimal *) | 
| 9326 | 410 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 411 | local | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 412 | |
| 64275 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
62819diff
changeset | 413 | val ten = Char.ord #"0" + 10; | 
| 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
62819diff
changeset | 414 | val a = Char.ord #"a"; | 
| 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
62819diff
changeset | 415 | val A = Char.ord #"A"; | 
| 35428 | 416 | val _ = a > A orelse raise Fail "Bad ASCII"; | 
| 20096 | 417 | |
| 418 | fun remap_hex c = | |
| 419 | let val x = ord c in | |
| 420 | if x >= a then chr (x - a + ten) | |
| 421 | else if x >= A then chr (x - A + ten) | |
| 422 | else c | |
| 423 | end; | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 424 | |
| 21781 | 425 | fun leading_zeros ["0"] = 0 | 
| 426 |   | leading_zeros ("0" :: cs) = 1 + leading_zeros cs
 | |
| 427 | | leading_zeros _ = 0; | |
| 428 | ||
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 429 | in | 
| 15965 
f422f8283491
Use of IntInf.int instead of int in most numeric simprocs; avoids
 paulson parents: 
15570diff
changeset | 430 | |
| 58421 | 431 | fun read_num str = | 
| 9326 | 432 | let | 
| 58421 | 433 | val (radix, digs) = | 
| 434 | (case Symbol.explode str of | |
| 435 | "0" :: "x" :: cs => (16, map remap_hex cs) | |
| 436 | | "0" :: "b" :: cs => (2, cs) | |
| 437 | | cs => (10, cs)); | |
| 29156 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 438 | in | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 439 |    {radix = radix,
 | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 440 | leading_zeros = leading_zeros digs, | 
| 58421 | 441 | value = #1 (Library.read_radix_int radix digs)} | 
| 29156 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 442 | end; | 
| 9326 | 443 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 444 | end; | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 445 | |
| 28904 | 446 | fun read_float str = | 
| 447 | let | |
| 58421 | 448 | val cs = Symbol.explode str; | 
| 29156 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 449 | val (intpart, fracpart) = | 
| 67522 | 450 | (case chop_prefix Symbol.is_digit cs of | 
| 29156 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 451 | (intpart, "." :: fracpart) => (intpart, fracpart) | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 452 | | _ => raise Fail "read_float"); | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 453 | in | 
| 58421 | 454 |    {mant = #1 (Library.read_int (intpart @ fracpart)),
 | 
| 29156 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 455 | exp = length fracpart} | 
| 42046 
6341c23baf10
added Lexicon.encode_position, Lexicon.decode_position;
 wenzelm parents: 
40525diff
changeset | 456 | end; | 
| 
6341c23baf10
added Lexicon.encode_position, Lexicon.decode_position;
 wenzelm parents: 
40525diff
changeset | 457 | |
| 42290 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 458 | |
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 459 | (* marked logical entities *) | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 460 | |
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 461 | fun marker s = (prefix s, unprefix s); | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 462 | |
| 62529 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
61476diff
changeset | 463 | val (mark_class, unmark_class) = marker "\<^class>"; | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
61476diff
changeset | 464 | val (mark_type, unmark_type) = marker "\<^type>"; | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
61476diff
changeset | 465 | val (mark_const, unmark_const) = marker "\<^const>"; | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
61476diff
changeset | 466 | val (mark_fixed, unmark_fixed) = marker "\<^fixed>"; | 
| 42290 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 467 | |
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 468 | fun unmark {case_class, case_type, case_const, case_fixed, case_default} s =
 | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 469 | (case try unmark_class s of | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 470 | SOME c => case_class c | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 471 | | NONE => | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 472 | (case try unmark_type s of | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 473 | SOME c => case_type c | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 474 | | NONE => | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 475 | (case try unmark_const s of | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 476 | SOME c => case_const c | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 477 | | NONE => | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 478 | (case try unmark_fixed s of | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 479 | SOME c => case_fixed c | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 480 | | NONE => case_default s)))); | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 481 | |
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 482 | val is_marked = | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 483 |   unmark {case_class = K true, case_type = K true, case_const = K true,
 | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 484 | case_fixed = K true, case_default = K false}; | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 485 | |
| 42476 | 486 | val dummy_type = Syntax.const (mark_type "dummy"); | 
| 487 | val fun_type = Syntax.const (mark_type "fun"); | |
| 42290 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
42264diff
changeset | 488 | |
| 59196 | 489 | |
| 490 | (* toplevel pretty printing *) | |
| 491 | ||
| 62663 | 492 | val _ = | 
| 62819 
d3ff367a16a0
careful export of type-dependent functions, without losing their special status;
 wenzelm parents: 
62782diff
changeset | 493 | ML_system_pp (fn _ => fn _ => | 
| 62663 | 494 |     Pretty.to_polyml o Pretty.str_list "{" "}" o map quote o Scan.dest_lexicon);
 | 
| 59196 | 495 | |
| 0 | 496 | end; |