author | boehmes |
Wed, 12 May 2010 23:53:59 +0200 | |
changeset 36895 | a96f9793d9c5 |
parent 32738 | 15bb09ca0378 |
permissions | -rw-r--r-- |
5825 | 1 |
(* Title: Pure/Isar/outer_lex.ML |
2 |
Author: Markus Wenzel, TU Muenchen |
|
3 |
||
4 |
Outer lexical syntax for Isabelle/Isar. |
|
5 |
*) |
|
6 |
||
7 |
signature OUTER_LEX = |
|
8 |
sig |
|
9 |
datatype token_kind = |
|
27814 | 10 |
Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | |
11 |
Nat | String | AltString | Verbatim | Space | Comment | InternalValue | |
|
12 |
Malformed | Error of string | Sync | EOF |
|
13 |
datatype value = |
|
14 |
Text of string | Typ of typ | Term of term | Fact of thm list | |
|
15 |
Attribute of morphism -> attribute |
|
16 |
type token |
|
5825 | 17 |
val str_of_kind: token_kind -> string |
18 |
val position_of: token -> Position.T |
|
27752
ea7d573e565f
removed obsolete range_of (already included in position);
wenzelm
parents:
27747
diff
changeset
|
19 |
val end_position_of: token -> Position.T |
5825 | 20 |
val pos_of: token -> string |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
21 |
val eof: token |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
22 |
val is_eof: token -> bool |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
23 |
val not_eof: token -> bool |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
24 |
val not_sync: token -> bool |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
25 |
val stopper: token Scan.stopper |
23721 | 26 |
val kind_of: token -> token_kind |
5825 | 27 |
val is_kind: token_kind -> token -> bool |
7026 | 28 |
val keyword_with: (string -> bool) -> token -> bool |
16029 | 29 |
val ident_with: (string -> bool) -> token -> bool |
5825 | 30 |
val is_proper: token -> bool |
9130 | 31 |
val is_semicolon: token -> bool |
17069 | 32 |
val is_comment: token -> bool |
8580 | 33 |
val is_begin_ignore: token -> bool |
34 |
val is_end_ignore: token -> bool |
|
17069 | 35 |
val is_blank: token -> bool |
8651 | 36 |
val is_newline: token -> bool |
27747
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
37 |
val source_of: token -> string |
30573 | 38 |
val source_position_of: token -> Symbol_Pos.text * Position.T |
27814 | 39 |
val content_of: token -> string |
14991 | 40 |
val unparse: token -> string |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
41 |
val text_of: token -> string * string |
27814 | 42 |
val get_value: token -> value option |
43 |
val map_value: (value -> value) -> token -> token |
|
44 |
val mk_text: string -> token |
|
45 |
val mk_typ: typ -> token |
|
46 |
val mk_term: term -> token |
|
47 |
val mk_fact: thm list -> token |
|
48 |
val mk_attribute: (morphism -> attribute) -> token |
|
49 |
val assignable: token -> token |
|
50 |
val assign: value option -> token -> unit |
|
51 |
val closure: token -> token |
|
52 |
val ident_or_symbolic: string -> bool |
|
30573 | 53 |
val !!! : string -> (Symbol_Pos.T list -> 'a) -> Symbol_Pos.T list -> 'a |
27769 | 54 |
val source_proper: (token, 'a) Source.source -> (token, (token, 'a) Source.source) Source.source |
27835
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
wenzelm
parents:
27814
diff
changeset
|
55 |
val source': {do_recover: bool Option.option} -> (unit -> Scan.lexicon * Scan.lexicon) -> |
30573 | 56 |
(Symbol_Pos.T, 'a) Source.source -> (token, (Symbol_Pos.T, 'a) Source.source) Source.source |
27835
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
wenzelm
parents:
27814
diff
changeset
|
57 |
val source: {do_recover: bool Option.option} -> (unit -> Scan.lexicon * Scan.lexicon) -> |
27769 | 58 |
Position.T -> (Symbol.symbol, 'a) Source.source -> (token, |
30573 | 59 |
(Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source |
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
60 |
val read_antiq: Scan.lexicon -> (token list -> 'a * token list) -> |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
61 |
Symbol_Pos.T list * Position.T -> 'a |
5825 | 62 |
end; |
63 |
||
64 |
structure OuterLex: OUTER_LEX = |
|
65 |
struct |
|
66 |
||
67 |
(** tokens **) |
|
68 |
||
27814 | 69 |
(* token values *) |
70 |
||
71 |
(*The value slot assigns an (optional) internal value to a token, |
|
72 |
usually as a side-effect of special scanner setup (see also |
|
73 |
args.ML). Note that an assignable ref designates an intermediate |
|
74 |
state of internalization -- it is NOT meant to persist.*) |
|
75 |
||
76 |
datatype value = |
|
77 |
Text of string | |
|
78 |
Typ of typ | |
|
79 |
Term of term | |
|
80 |
Fact of thm list | |
|
81 |
Attribute of morphism -> attribute; |
|
82 |
||
83 |
datatype slot = |
|
84 |
Slot | |
|
85 |
Value of value option | |
|
32738 | 86 |
Assignable of value option Unsynchronized.ref; |
27814 | 87 |
|
88 |
||
5825 | 89 |
(* datatype token *) |
90 |
||
91 |
datatype token_kind = |
|
27814 | 92 |
Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | |
93 |
Nat | String | AltString | Verbatim | Space | Comment | InternalValue | |
|
94 |
Malformed | Error of string | Sync | EOF; |
|
5825 | 95 |
|
30573 | 96 |
datatype token = Token of (Symbol_Pos.text * Position.range) * (token_kind * string) * slot; |
5825 | 97 |
|
98 |
val str_of_kind = |
|
7026 | 99 |
fn Command => "command" |
100 |
| Keyword => "keyword" |
|
5825 | 101 |
| Ident => "identifier" |
102 |
| LongIdent => "long identifier" |
|
103 |
| SymIdent => "symbolic identifier" |
|
104 |
| Var => "schematic variable" |
|
105 |
| TypeIdent => "type variable" |
|
106 |
| TypeVar => "schematic type variable" |
|
107 |
| Nat => "number" |
|
108 |
| String => "string" |
|
17164
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
109 |
| AltString => "back-quoted string" |
5825 | 110 |
| Verbatim => "verbatim text" |
7682
46de8064c93c
added Space, Comment token kinds (keep actual text);
wenzelm
parents:
7477
diff
changeset
|
111 |
| Space => "white space" |
46de8064c93c
added Space, Comment token kinds (keep actual text);
wenzelm
parents:
7477
diff
changeset
|
112 |
| Comment => "comment text" |
27814 | 113 |
| InternalValue => "internal value" |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
114 |
| Malformed => "malformed symbolic character" |
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
115 |
| Error _ => "bad input" |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
116 |
| Sync => "sync marker" |
5825 | 117 |
| EOF => "end-of-file"; |
118 |
||
119 |
||
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
120 |
(* position *) |
5825 | 121 |
|
27814 | 122 |
fun position_of (Token ((_, (pos, _)), _, _)) = pos; |
123 |
fun end_position_of (Token ((_, (_, pos)), _, _)) = pos; |
|
27663 | 124 |
|
5825 | 125 |
val pos_of = Position.str_of o position_of; |
126 |
||
127 |
||
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
128 |
(* control tokens *) |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
129 |
|
27814 | 130 |
fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, ""), Slot); |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
131 |
val eof = mk_eof Position.none; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
132 |
|
27814 | 133 |
fun is_eof (Token (_, (EOF, _), _)) = true |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
134 |
| is_eof _ = false; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
135 |
|
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
136 |
val not_eof = not o is_eof; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
137 |
|
27814 | 138 |
fun not_sync (Token (_, (Sync, _), _)) = false |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
139 |
| not_sync _ = true; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
140 |
|
27752
ea7d573e565f
removed obsolete range_of (already included in position);
wenzelm
parents:
27747
diff
changeset
|
141 |
val stopper = |
ea7d573e565f
removed obsolete range_of (already included in position);
wenzelm
parents:
27747
diff
changeset
|
142 |
Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof; |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
143 |
|
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
144 |
|
5825 | 145 |
(* kind of token *) |
146 |
||
27814 | 147 |
fun kind_of (Token (_, (k, _), _)) = k; |
148 |
fun is_kind k (Token (_, (k', _), _)) = k = k'; |
|
5825 | 149 |
|
27814 | 150 |
fun keyword_with pred (Token (_, (Keyword, x), _)) = pred x |
7026 | 151 |
| keyword_with _ _ = false; |
5825 | 152 |
|
27814 | 153 |
fun ident_with pred (Token (_, (Ident, x), _)) = pred x |
16029 | 154 |
| ident_with _ _ = false; |
155 |
||
27814 | 156 |
fun is_proper (Token (_, (Space, _), _)) = false |
157 |
| is_proper (Token (_, (Comment, _), _)) = false |
|
5825 | 158 |
| is_proper _ = true; |
159 |
||
27814 | 160 |
fun is_semicolon (Token (_, (Keyword, ";"), _)) = true |
9130 | 161 |
| is_semicolon _ = false; |
162 |
||
27814 | 163 |
fun is_comment (Token (_, (Comment, _), _)) = true |
17069 | 164 |
| is_comment _ = false; |
165 |
||
27814 | 166 |
fun is_begin_ignore (Token (_, (Comment, "<"), _)) = true |
8580 | 167 |
| is_begin_ignore _ = false; |
168 |
||
27814 | 169 |
fun is_end_ignore (Token (_, (Comment, ">"), _)) = true |
8580 | 170 |
| is_end_ignore _ = false; |
171 |
||
8651 | 172 |
|
17069 | 173 |
(* blanks and newlines -- space tokens obey lines *) |
8651 | 174 |
|
27814 | 175 |
fun is_blank (Token (_, (Space, x), _)) = not (String.isSuffix "\n" x) |
17069 | 176 |
| is_blank _ = false; |
177 |
||
27814 | 178 |
fun is_newline (Token (_, (Space, x), _)) = String.isSuffix "\n" x |
8651 | 179 |
| is_newline _ = false; |
180 |
||
5825 | 181 |
|
14991 | 182 |
(* token content *) |
9155 | 183 |
|
27814 | 184 |
fun source_of (Token ((source, (pos, _)), _, _)) = |
185 |
YXML.string_of (XML.Elem (Markup.tokenN, Position.properties_of pos, [XML.Text source])); |
|
25642
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
wenzelm
parents:
25582
diff
changeset
|
186 |
|
27885 | 187 |
fun source_position_of (Token ((source, (pos, _)), _, _)) = (source, pos); |
27873 | 188 |
|
27814 | 189 |
fun content_of (Token (_, (_, x), _)) = x; |
27747
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
190 |
|
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
191 |
|
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
192 |
(* unparse *) |
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
193 |
|
18547 | 194 |
fun escape q = |
195 |
implode o map (fn s => if s = q orelse s = "\\" then "\\" ^ s else s) o Symbol.explode; |
|
196 |
||
27814 | 197 |
fun unparse (Token (_, (kind, x), _)) = |
14991 | 198 |
(case kind of |
18547 | 199 |
String => x |> quote o escape "\"" |
200 |
| AltString => x |> enclose "`" "`" o escape "`" |
|
14991 | 201 |
| Verbatim => x |> enclose "{*" "*}" |
202 |
| Comment => x |> enclose "(*" "*)" |
|
28034
33050286e65d
text_of Malformed: explode with spaces -- Output.output/escape ineffective by default;
wenzelm
parents:
27885
diff
changeset
|
203 |
| Malformed => space_implode " " (explode x) |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
204 |
| Sync => "" |
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
205 |
| EOF => "" |
14991 | 206 |
| _ => x); |
207 |
||
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
208 |
fun text_of tok = |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
209 |
if is_semicolon tok then ("terminator", "") |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
210 |
else |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
211 |
let |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
212 |
val k = str_of_kind (kind_of tok); |
25642
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
wenzelm
parents:
25582
diff
changeset
|
213 |
val s = unparse tok |
27814 | 214 |
handle ERROR _ => Symbol.separate_chars (content_of tok); |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
215 |
in |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
216 |
if s = "" then (k, "") |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
217 |
else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) then (k ^ " " ^ s, "") |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
218 |
else (k, s) |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
219 |
end; |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
220 |
|
5825 | 221 |
|
222 |
||
27814 | 223 |
(** associated values **) |
224 |
||
225 |
(* access values *) |
|
226 |
||
227 |
fun get_value (Token (_, _, Value v)) = v |
|
228 |
| get_value _ = NONE; |
|
229 |
||
230 |
fun map_value f (Token (x, y, Value (SOME v))) = Token (x, y, Value (SOME (f v))) |
|
231 |
| map_value _ tok = tok; |
|
232 |
||
233 |
||
234 |
(* make values *) |
|
235 |
||
236 |
fun mk_value k v = Token ((k, Position.no_range), (InternalValue, k), Value (SOME v)); |
|
237 |
||
238 |
val mk_text = mk_value "<text>" o Text; |
|
239 |
val mk_typ = mk_value "<typ>" o Typ; |
|
240 |
val mk_term = mk_value "<term>" o Term; |
|
241 |
val mk_fact = mk_value "<fact>" o Fact; |
|
242 |
val mk_attribute = mk_value "<attribute>" o Attribute; |
|
243 |
||
244 |
||
245 |
(* static binding *) |
|
246 |
||
247 |
(*1st stage: make empty slots assignable*) |
|
32738 | 248 |
fun assignable (Token (x, y, Slot)) = Token (x, y, Assignable (Unsynchronized.ref NONE)) |
27814 | 249 |
| assignable tok = tok; |
250 |
||
251 |
(*2nd stage: assign values as side-effect of scanning*) |
|
252 |
fun assign v (Token (_, _, Assignable r)) = r := v |
|
253 |
| assign _ _ = (); |
|
254 |
||
255 |
(*3rd stage: static closure of final values*) |
|
32738 | 256 |
fun closure (Token (x, y, Assignable (Unsynchronized.ref v))) = Token (x, y, Value v) |
27814 | 257 |
| closure tok = tok; |
258 |
||
259 |
||
260 |
||
5825 | 261 |
(** scanners **) |
262 |
||
30573 | 263 |
open Basic_Symbol_Pos; |
5825 | 264 |
|
30573 | 265 |
fun !!! msg = Symbol_Pos.!!! ("Outer lexical error: " ^ msg); |
5825 | 266 |
|
267 |
||
268 |
(* scan symbolic idents *) |
|
269 |
||
20664 | 270 |
val is_sym_char = member (op =) (explode "!#$%&*+-/<=>?@^_|~"); |
5825 | 271 |
|
8231 | 272 |
val scan_symid = |
27769 | 273 |
Scan.many1 (is_sym_char o symbol) || |
274 |
Scan.one (Symbol.is_symbolic o symbol) >> single; |
|
5825 | 275 |
|
8231 | 276 |
fun is_symid str = |
277 |
(case try Symbol.explode str of |
|
15531 | 278 |
SOME [s] => Symbol.is_symbolic s orelse is_sym_char s |
279 |
| SOME ss => forall is_sym_char ss |
|
8231 | 280 |
| _ => false); |
281 |
||
27814 | 282 |
fun ident_or_symbolic "begin" = false |
283 |
| ident_or_symbolic ":" = true |
|
284 |
| ident_or_symbolic "::" = true |
|
285 |
| ident_or_symbolic s = Syntax.is_identifier s orelse is_symid s; |
|
5825 | 286 |
|
287 |
||
288 |
(* scan verbatim text *) |
|
289 |
||
290 |
val scan_verb = |
|
27769 | 291 |
$$$ "*" --| Scan.ahead (~$$$ "}") || |
292 |
Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single; |
|
5825 | 293 |
|
294 |
val scan_verbatim = |
|
30573 | 295 |
(Symbol_Pos.scan_pos --| $$$ "{" --| $$$ "*") -- !!! "missing end of verbatim text" |
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
296 |
(Symbol_Pos.change_prompt |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
297 |
((Scan.repeat scan_verb >> flat) -- ($$$ "*" |-- $$$ "}" |-- Symbol_Pos.scan_pos))); |
5825 | 298 |
|
299 |
||
300 |
(* scan space *) |
|
301 |
||
19305 | 302 |
fun is_space s = Symbol.is_blank s andalso s <> "\n"; |
5825 | 303 |
|
304 |
val scan_space = |
|
27769 | 305 |
Scan.many1 (is_space o symbol) @@@ Scan.optional ($$$ "\n") [] || |
306 |
Scan.many (is_space o symbol) @@@ $$$ "\n"; |
|
5825 | 307 |
|
308 |
||
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
309 |
(* scan comment *) |
5825 | 310 |
|
311 |
val scan_comment = |
|
30573 | 312 |
Symbol_Pos.scan_pos -- (Symbol_Pos.scan_comment_body !!! -- Symbol_Pos.scan_pos); |
5825 | 313 |
|
314 |
||
23678 | 315 |
(* scan malformed symbols *) |
316 |
||
317 |
val scan_malformed = |
|
27769 | 318 |
$$$ Symbol.malformed |-- |
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
319 |
Symbol_Pos.change_prompt (Scan.many (Symbol.is_regular o symbol)) |
27769 | 320 |
--| Scan.option ($$$ Symbol.end_malformed); |
27752
ea7d573e565f
removed obsolete range_of (already included in position);
wenzelm
parents:
27747
diff
changeset
|
321 |
|
ea7d573e565f
removed obsolete range_of (already included in position);
wenzelm
parents:
27747
diff
changeset
|
322 |
|
27663 | 323 |
|
27769 | 324 |
(** token sources **) |
5825 | 325 |
|
27769 | 326 |
fun source_proper src = src |> Source.filter is_proper; |
5825 | 327 |
|
23678 | 328 |
local |
329 |
||
27769 | 330 |
fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
331 |
|
27799 | 332 |
fun token k ss = |
30573 | 333 |
Token ((Symbol_Pos.implode ss, Symbol_Pos.range ss), (k, Symbol_Pos.untabify_content ss), Slot); |
27799 | 334 |
|
335 |
fun token_range k (pos1, (ss, pos2)) = |
|
30573 | 336 |
Token (Symbol_Pos.implode_range pos1 pos2 ss, (k, Symbol_Pos.untabify_content ss), Slot); |
23678 | 337 |
|
27769 | 338 |
fun scan (lex1, lex2) = !!! "bad input" |
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
339 |
(Symbol_Pos.scan_string >> token_range String || |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
340 |
Symbol_Pos.scan_alt_string >> token_range AltString || |
27799 | 341 |
scan_verbatim >> token_range Verbatim || |
342 |
scan_comment >> token_range Comment || |
|
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
343 |
scan_space >> token Space || |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
344 |
scan_malformed >> token Malformed || |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
345 |
Scan.one (Symbol.is_sync o symbol) >> (token Sync o single) || |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
346 |
(Scan.max token_leq |
27769 | 347 |
(Scan.max token_leq |
348 |
(Scan.literal lex2 >> pair Command) |
|
349 |
(Scan.literal lex1 >> pair Keyword)) |
|
350 |
(Syntax.scan_longid >> pair LongIdent || |
|
351 |
Syntax.scan_id >> pair Ident || |
|
352 |
Syntax.scan_var >> pair Var || |
|
353 |
Syntax.scan_tid >> pair TypeIdent || |
|
354 |
Syntax.scan_tvar >> pair TypeVar || |
|
355 |
Syntax.scan_nat >> pair Nat || |
|
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
356 |
scan_symid >> pair SymIdent) >> uncurry token)); |
27769 | 357 |
|
358 |
fun recover msg = |
|
359 |
Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o symbol) |
|
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
360 |
>> (single o token (Error msg)); |
23678 | 361 |
|
362 |
in |
|
5825 | 363 |
|
27835
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
wenzelm
parents:
27814
diff
changeset
|
364 |
fun source' {do_recover} get_lex = |
30573 | 365 |
Source.source Symbol_Pos.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
366 |
(Option.map (rpair recover) do_recover); |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
367 |
|
5825 | 368 |
fun source do_recover get_lex pos src = |
30573 | 369 |
Symbol_Pos.source pos src |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
370 |
|> source' do_recover get_lex; |
23678 | 371 |
|
372 |
end; |
|
5825 | 373 |
|
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
374 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
375 |
(* read_antiq *) |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
376 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
377 |
fun read_antiq lex scan (syms, pos) = |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
378 |
let |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
379 |
fun err msg = cat_error msg ("Malformed antiquotation" ^ Position.str_of pos ^ ":\n" ^ |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
380 |
"@{" ^ Symbol_Pos.content syms ^ "}"); |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
381 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
382 |
val res = |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
383 |
Source.of_list syms |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
384 |
|> source' {do_recover = NONE} (K (lex, Scan.empty_lexicon)) |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
385 |
|> source_proper |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
386 |
|> Source.source stopper (Scan.error (Scan.bulk scan)) NONE |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
387 |
|> Source.exhaust; |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
388 |
in (case res of [x] => x | _ => err "") handle ERROR msg => err msg end; |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
389 |
|
5825 | 390 |
end; |