author | wenzelm |
Thu, 07 Aug 2008 19:21:42 +0200 | |
changeset 27780 | 7d0910f662f7 |
parent 27769 | ad50c38ef842 |
child 27799 | 52f07d5292cd |
permissions | -rw-r--r-- |
5825 | 1 |
(* Title: Pure/Isar/outer_lex.ML |
2 |
ID: $Id$ |
|
3 |
Author: Markus Wenzel, TU Muenchen |
|
4 |
||
5 |
Outer lexical syntax for Isabelle/Isar. |
|
6 |
*) |
|
7 |
||
8 |
signature OUTER_LEX = |
|
9 |
sig |
|
10 |
datatype token_kind = |
|
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
11 |
Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | Nat | |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
12 |
String | AltString | Verbatim | Space | Comment | Malformed | Error of string | Sync | EOF |
15143
05b5995f214e
Make token an eqtype to assist reconstructing input
aspinall
parents:
14991
diff
changeset
|
13 |
eqtype token |
5825 | 14 |
val str_of_kind: token_kind -> string |
15 |
val position_of: token -> Position.T |
|
27752
ea7d573e565f
removed obsolete range_of (already included in position);
wenzelm
parents:
27747
diff
changeset
|
16 |
val end_position_of: token -> Position.T |
5825 | 17 |
val pos_of: token -> string |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
18 |
val eof: token |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
19 |
val is_eof: token -> bool |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
20 |
val not_eof: token -> bool |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
21 |
val not_sync: token -> bool |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
22 |
val stopper: token Scan.stopper |
23721 | 23 |
val kind_of: token -> token_kind |
5825 | 24 |
val is_kind: token_kind -> token -> bool |
7026 | 25 |
val keyword_with: (string -> bool) -> token -> bool |
16029 | 26 |
val ident_with: (string -> bool) -> token -> bool |
5825 | 27 |
val is_proper: token -> bool |
9130 | 28 |
val is_semicolon: token -> bool |
17069 | 29 |
val is_comment: token -> bool |
8580 | 30 |
val is_begin_ignore: token -> bool |
31 |
val is_end_ignore: token -> bool |
|
17069 | 32 |
val is_blank: token -> bool |
8651 | 33 |
val is_newline: token -> bool |
25642
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
wenzelm
parents:
25582
diff
changeset
|
34 |
val val_of: token -> string |
27747
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
35 |
val source_of: token -> string |
14991 | 36 |
val unparse: token -> string |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
37 |
val text_of: token -> string * string |
5876 | 38 |
val is_sid: string -> bool |
27769 | 39 |
val !!! : string -> (SymbolPos.T list -> 'a) -> SymbolPos.T list -> 'a |
40 |
val scan_quoted: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
|
41 |
val source_proper: (token, 'a) Source.source -> (token, (token, 'a) Source.source) Source.source |
|
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
42 |
val source': bool Option.option -> (unit -> Scan.lexicon * Scan.lexicon) -> |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
43 |
(SymbolPos.T, 'a) Source.source -> (token, (SymbolPos.T, 'a) Source.source) Source.source |
27769 | 44 |
val source: bool Option.option -> (unit -> Scan.lexicon * Scan.lexicon) -> |
45 |
Position.T -> (Symbol.symbol, 'a) Source.source -> (token, |
|
46 |
(SymbolPos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source |
|
5825 | 47 |
end; |
48 |
||
49 |
structure OuterLex: OUTER_LEX = |
|
50 |
struct |
|
51 |
||
52 |
(** tokens **) |
|
53 |
||
54 |
(* datatype token *) |
|
55 |
||
56 |
datatype token_kind = |
|
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
57 |
Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | Nat | |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
58 |
String | AltString | Verbatim | Space | Comment | Malformed | Error of string | Sync | EOF; |
5825 | 59 |
|
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
60 |
datatype token = Token of (SymbolPos.text * Position.range) * (token_kind * string); |
5825 | 61 |
|
62 |
val str_of_kind = |
|
7026 | 63 |
fn Command => "command" |
64 |
| Keyword => "keyword" |
|
5825 | 65 |
| Ident => "identifier" |
66 |
| LongIdent => "long identifier" |
|
67 |
| SymIdent => "symbolic identifier" |
|
68 |
| Var => "schematic variable" |
|
69 |
| TypeIdent => "type variable" |
|
70 |
| TypeVar => "schematic type variable" |
|
71 |
| Nat => "number" |
|
72 |
| String => "string" |
|
17164
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
73 |
| AltString => "back-quoted string" |
5825 | 74 |
| Verbatim => "verbatim text" |
7682
46de8064c93c
added Space, Comment token kinds (keep actual text);
wenzelm
parents:
7477
diff
changeset
|
75 |
| Space => "white space" |
46de8064c93c
added Space, Comment token kinds (keep actual text);
wenzelm
parents:
7477
diff
changeset
|
76 |
| Comment => "comment text" |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
77 |
| Malformed => "malformed symbolic character" |
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
78 |
| Error _ => "bad input" |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
79 |
| Sync => "sync marker" |
5825 | 80 |
| EOF => "end-of-file"; |
81 |
||
82 |
||
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
83 |
(* position *) |
5825 | 84 |
|
27769 | 85 |
fun position_of (Token ((_, (pos, _)), _)) = pos; |
86 |
fun end_position_of (Token ((_, (_, pos)), _)) = pos; |
|
27663 | 87 |
|
5825 | 88 |
val pos_of = Position.str_of o position_of; |
89 |
||
90 |
||
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
91 |
(* control tokens *) |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
92 |
|
27769 | 93 |
fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, "")); |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
94 |
val eof = mk_eof Position.none; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
95 |
|
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
96 |
fun is_eof (Token (_, (EOF, _))) = true |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
97 |
| is_eof _ = false; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
98 |
|
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
99 |
val not_eof = not o is_eof; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
100 |
|
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
101 |
fun not_sync (Token (_, (Sync, _))) = false |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
102 |
| not_sync _ = true; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
103 |
|
27752
ea7d573e565f
removed obsolete range_of (already included in position);
wenzelm
parents:
27747
diff
changeset
|
104 |
val stopper = |
ea7d573e565f
removed obsolete range_of (already included in position);
wenzelm
parents:
27747
diff
changeset
|
105 |
Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof; |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
106 |
|
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
107 |
|
5825 | 108 |
(* kind of token *) |
109 |
||
23721 | 110 |
fun kind_of (Token (_, (k, _))) = k; |
5825 | 111 |
fun is_kind k (Token (_, (k', _))) = k = k'; |
112 |
||
7026 | 113 |
fun keyword_with pred (Token (_, (Keyword, x))) = pred x |
114 |
| keyword_with _ _ = false; |
|
5825 | 115 |
|
16029 | 116 |
fun ident_with pred (Token (_, (Ident, x))) = pred x |
117 |
| ident_with _ _ = false; |
|
118 |
||
7682
46de8064c93c
added Space, Comment token kinds (keep actual text);
wenzelm
parents:
7477
diff
changeset
|
119 |
fun is_proper (Token (_, (Space, _))) = false |
46de8064c93c
added Space, Comment token kinds (keep actual text);
wenzelm
parents:
7477
diff
changeset
|
120 |
| is_proper (Token (_, (Comment, _))) = false |
5825 | 121 |
| is_proper _ = true; |
122 |
||
9195 | 123 |
fun is_semicolon (Token (_, (Keyword, ";"))) = true |
9130 | 124 |
| is_semicolon _ = false; |
125 |
||
17069 | 126 |
fun is_comment (Token (_, (Comment, _))) = true |
127 |
| is_comment _ = false; |
|
128 |
||
8580 | 129 |
fun is_begin_ignore (Token (_, (Comment, "<"))) = true |
130 |
| is_begin_ignore _ = false; |
|
131 |
||
132 |
fun is_end_ignore (Token (_, (Comment, ">"))) = true |
|
133 |
| is_end_ignore _ = false; |
|
134 |
||
8651 | 135 |
|
17069 | 136 |
(* blanks and newlines -- space tokens obey lines *) |
8651 | 137 |
|
23678 | 138 |
fun is_blank (Token (_, (Space, x))) = not (String.isSuffix "\n" x) |
17069 | 139 |
| is_blank _ = false; |
140 |
||
23678 | 141 |
fun is_newline (Token (_, (Space, x))) = String.isSuffix "\n" x |
8651 | 142 |
| is_newline _ = false; |
143 |
||
5825 | 144 |
|
14991 | 145 |
(* token content *) |
9155 | 146 |
|
25642
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
wenzelm
parents:
25582
diff
changeset
|
147 |
fun val_of (Token (_, (_, x))) = x; |
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
wenzelm
parents:
25582
diff
changeset
|
148 |
|
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
149 |
fun source_of (Token ((source, (pos, _)), _)) = |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
150 |
YXML.string_of (XML.Elem (Markup.tokenN, Position.properties_of pos, [XML.Text source])); |
27747
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
151 |
|
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
152 |
|
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
153 |
(* unparse *) |
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
154 |
|
18547 | 155 |
fun escape q = |
156 |
implode o map (fn s => if s = q orelse s = "\\" then "\\" ^ s else s) o Symbol.explode; |
|
157 |
||
14991 | 158 |
fun unparse (Token (_, (kind, x))) = |
159 |
(case kind of |
|
18547 | 160 |
String => x |> quote o escape "\"" |
161 |
| AltString => x |> enclose "`" "`" o escape "`" |
|
14991 | 162 |
| Verbatim => x |> enclose "{*" "*}" |
163 |
| Comment => x |> enclose "(*" "*)" |
|
25642
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
wenzelm
parents:
25582
diff
changeset
|
164 |
| Malformed => Output.escape (translate_string Output.output x) |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
165 |
| Sync => "" |
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
166 |
| EOF => "" |
14991 | 167 |
| _ => x); |
168 |
||
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
169 |
fun text_of tok = |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
170 |
if is_semicolon tok then ("terminator", "") |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
171 |
else |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
172 |
let |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
173 |
val k = str_of_kind (kind_of tok); |
25642
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
wenzelm
parents:
25582
diff
changeset
|
174 |
val s = unparse tok |
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
wenzelm
parents:
25582
diff
changeset
|
175 |
handle ERROR _ => Symbol.separate_chars (val_of tok); |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
176 |
in |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
177 |
if s = "" then (k, "") |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
178 |
else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) then (k ^ " " ^ s, "") |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
179 |
else (k, s) |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
180 |
end; |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
181 |
|
5825 | 182 |
|
183 |
||
184 |
(** scanners **) |
|
185 |
||
27769 | 186 |
open BasicSymbolPos; |
5825 | 187 |
|
27769 | 188 |
fun !!! msg = SymbolPos.!!! ("Outer lexical error: " ^ msg); |
5825 | 189 |
|
27769 | 190 |
fun change_prompt scan = Scan.prompt "# " scan; |
5825 | 191 |
|
192 |
||
193 |
(* scan symbolic idents *) |
|
194 |
||
20664 | 195 |
val is_sym_char = member (op =) (explode "!#$%&*+-/<=>?@^_|~"); |
5825 | 196 |
|
8231 | 197 |
val scan_symid = |
27769 | 198 |
Scan.many1 (is_sym_char o symbol) || |
199 |
Scan.one (Symbol.is_symbolic o symbol) >> single; |
|
5825 | 200 |
|
8231 | 201 |
fun is_symid str = |
202 |
(case try Symbol.explode str of |
|
15531 | 203 |
SOME [s] => Symbol.is_symbolic s orelse is_sym_char s |
204 |
| SOME ss => forall is_sym_char ss |
|
8231 | 205 |
| _ => false); |
206 |
||
20982 | 207 |
fun is_sid "begin" = false |
208 |
| is_sid ":" = true |
|
22873 | 209 |
| is_sid "::" = true |
20982 | 210 |
| is_sid s = is_symid s orelse Syntax.is_identifier s; |
5825 | 211 |
|
212 |
||
213 |
(* scan strings *) |
|
214 |
||
17164
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
215 |
local |
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
216 |
|
25579
22869d9d545b
(alt)string: allow explicit character codes (as in ML);
wenzelm
parents:
24577
diff
changeset
|
217 |
val char_code = |
27769 | 218 |
Scan.one (Symbol.is_ascii_digit o symbol) -- |
219 |
Scan.one (Symbol.is_ascii_digit o symbol) -- |
|
220 |
Scan.one (Symbol.is_ascii_digit o symbol) :|-- |
|
221 |
(fn (((a, pos), (b, _)), (c, _)) => |
|
25579
22869d9d545b
(alt)string: allow explicit character codes (as in ML);
wenzelm
parents:
24577
diff
changeset
|
222 |
let val (n, _) = Library.read_int [a, b, c] |
27769 | 223 |
in if n <= 255 then Scan.succeed [(chr n, pos)] else Scan.fail end); |
25579
22869d9d545b
(alt)string: allow explicit character codes (as in ML);
wenzelm
parents:
24577
diff
changeset
|
224 |
|
17164
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
225 |
fun scan_str q = |
27769 | 226 |
$$$ "\\" |-- !!! "bad escape character in string" ($$$ q || $$$ "\\" || char_code) || |
227 |
Scan.one (fn (s, _) => s <> q andalso s <> "\\" andalso Symbol.is_regular s) >> single; |
|
5825 | 228 |
|
17164
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
229 |
fun scan_strs q = |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
230 |
(SymbolPos.scan_pos --| $$$ q) -- !!! "missing quote at end of string" |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
231 |
(change_prompt ((Scan.repeat (scan_str q) >> flat) -- ($$$ q |-- SymbolPos.scan_pos))); |
17164
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
232 |
|
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
233 |
in |
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
234 |
|
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
235 |
val scan_string = scan_strs "\""; |
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
236 |
val scan_alt_string = scan_strs "`"; |
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
237 |
|
27769 | 238 |
val scan_quoted = Scan.trace (scan_string || scan_alt_string) >> #2; |
27752
ea7d573e565f
removed obsolete range_of (already included in position);
wenzelm
parents:
27747
diff
changeset
|
239 |
|
17164
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
240 |
end; |
5825 | 241 |
|
242 |
||
243 |
(* scan verbatim text *) |
|
244 |
||
245 |
val scan_verb = |
|
27769 | 246 |
$$$ "*" --| Scan.ahead (~$$$ "}") || |
247 |
Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single; |
|
5825 | 248 |
|
249 |
val scan_verbatim = |
|
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
250 |
(SymbolPos.scan_pos --| $$$ "{" --| $$$ "*") -- !!! "missing end of verbatim text" |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
251 |
(change_prompt ((Scan.repeat scan_verb >> flat) -- ($$$ "*" |-- $$$ "}" |-- SymbolPos.scan_pos))); |
5825 | 252 |
|
253 |
||
254 |
(* scan space *) |
|
255 |
||
19305 | 256 |
fun is_space s = Symbol.is_blank s andalso s <> "\n"; |
5825 | 257 |
|
258 |
val scan_space = |
|
27769 | 259 |
Scan.many1 (is_space o symbol) @@@ Scan.optional ($$$ "\n") [] || |
260 |
Scan.many (is_space o symbol) @@@ $$$ "\n"; |
|
5825 | 261 |
|
262 |
||
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
263 |
(* scan comment *) |
5825 | 264 |
|
265 |
val scan_comment = |
|
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
266 |
SymbolPos.scan_pos -- (SymbolPos.scan_comment_body !!! -- SymbolPos.scan_pos); |
5825 | 267 |
|
268 |
||
23678 | 269 |
(* scan malformed symbols *) |
270 |
||
271 |
val scan_malformed = |
|
27769 | 272 |
$$$ Symbol.malformed |-- |
273 |
change_prompt (Scan.many (Symbol.is_regular o symbol)) |
|
274 |
--| Scan.option ($$$ Symbol.end_malformed); |
|
27752
ea7d573e565f
removed obsolete range_of (already included in position);
wenzelm
parents:
27747
diff
changeset
|
275 |
|
ea7d573e565f
removed obsolete range_of (already included in position);
wenzelm
parents:
27747
diff
changeset
|
276 |
|
27663 | 277 |
|
27769 | 278 |
(** token sources **) |
5825 | 279 |
|
27769 | 280 |
fun source_proper src = src |> Source.filter is_proper; |
5825 | 281 |
|
23678 | 282 |
local |
283 |
||
27769 | 284 |
fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
285 |
|
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
286 |
fun token k ss = Token (SymbolPos.implode ss, (k, implode (map symbol ss))); |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
287 |
fun token_delim k (pos1, (ss, pos2)) = |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
288 |
Token (SymbolPos.implode_delim pos1 pos2 ss, (k, implode (map symbol ss))); |
23678 | 289 |
|
27769 | 290 |
fun scan (lex1, lex2) = !!! "bad input" |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
291 |
(scan_string >> token_delim String || |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
292 |
scan_alt_string >> token_delim AltString || |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
293 |
scan_verbatim >> token_delim Verbatim || |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
294 |
scan_comment >> token_delim Comment || |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
295 |
scan_space >> token Space || |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
296 |
scan_malformed >> token Malformed || |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
297 |
Scan.one (Symbol.is_sync o symbol) >> (token Sync o single) || |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
298 |
(Scan.max token_leq |
27769 | 299 |
(Scan.max token_leq |
300 |
(Scan.literal lex2 >> pair Command) |
|
301 |
(Scan.literal lex1 >> pair Keyword)) |
|
302 |
(Syntax.scan_longid >> pair LongIdent || |
|
303 |
Syntax.scan_id >> pair Ident || |
|
304 |
Syntax.scan_var >> pair Var || |
|
305 |
Syntax.scan_tid >> pair TypeIdent || |
|
306 |
Syntax.scan_tvar >> pair TypeVar || |
|
307 |
Syntax.scan_nat >> pair Nat || |
|
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
308 |
scan_symid >> pair SymIdent) >> uncurry token)); |
27769 | 309 |
|
310 |
fun recover msg = |
|
311 |
Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o symbol) |
|
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
312 |
>> (single o token (Error msg)); |
23678 | 313 |
|
314 |
in |
|
5825 | 315 |
|
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
316 |
fun source' do_recover get_lex = |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
317 |
Source.source SymbolPos.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
318 |
(Option.map (rpair recover) do_recover); |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
319 |
|
5825 | 320 |
fun source do_recover get_lex pos src = |
27769 | 321 |
SymbolPos.source pos src |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
322 |
|> source' do_recover get_lex; |
23678 | 323 |
|
324 |
end; |
|
5825 | 325 |
|
326 |
end; |