31 val val_of: token -> string |
31 val val_of: token -> string |
32 val unparse: token -> string |
32 val unparse: token -> string |
33 val text_of: token -> string * string |
33 val text_of: token -> string * string |
34 val is_sid: string -> bool |
34 val is_sid: string -> bool |
35 val !!! : string -> (Position.T * 'a -> 'b) -> Position.T * 'a -> 'b |
35 val !!! : string -> (Position.T * 'a -> 'b) -> Position.T * 'a -> 'b |
36 val incr_line: ('a -> 'b * 'c) -> Position.T * 'a -> 'b * (Position.T * 'c) |
36 val count: (Symbol.symbol list -> Symbol.symbol * Symbol.symbol list) -> |
37 val keep_line: ('a -> 'b * 'c) -> Position.T * 'a -> 'b * (Position.T * 'c) |
37 Position.T * Symbol.symbol list -> Symbol.symbol * (Position.T * Symbol.symbol list) |
38 val scan_blank: Position.T * Symbol.symbol list |
38 val counted: (Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list) -> |
39 -> Symbol.symbol * (Position.T * Symbol.symbol list) |
39 Position.T * Symbol.symbol list -> string * (Position.T * Symbol.symbol list) |
40 val scan_string: Position.T * Symbol.symbol list -> string * (Position.T * Symbol.symbol list) |
40 val scan_string: Position.T * Symbol.symbol list -> string * (Position.T * Symbol.symbol list) |
41 val scan: (Scan.lexicon * Scan.lexicon) -> |
41 val scan: (Scan.lexicon * Scan.lexicon) -> |
42 Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list) |
42 Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list) |
43 val source: bool option -> (unit -> Scan.lexicon * Scan.lexicon) -> |
43 val source: bool option -> (unit -> Scan.lexicon * Scan.lexicon) -> |
44 Position.T -> (Symbol.symbol, 'a) Source.source -> |
44 Position.T -> (Symbol.symbol, 'a) Source.source -> |
184 |
184 |
185 fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs; |
185 fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs; |
186 fun !!! msg scan = Scan.!! (lex_err (K msg)) scan; |
186 fun !!! msg scan = Scan.!! (lex_err (K msg)) scan; |
187 |
187 |
188 |
188 |
189 (* line numbering *) |
189 (* position *) |
190 |
190 |
191 fun incr_line scan = Scan.depend (fn pos => scan >> pair (Position.inc pos)); |
191 local |
192 val keep_line = Scan.lift; |
192 |
193 |
193 fun map_position f (scan: Symbol.symbol list -> 'a * Symbol.symbol list) = |
194 val scan_blank = |
194 Scan.depend (fn (pos: Position.T) => scan >> (fn x => (f x pos, x))); |
195 incr_line ($$ "\n") || |
195 |
196 keep_line (Scan.one Symbol.is_blank); |
196 in |
|
197 |
|
198 fun count scan = map_position Position.advance scan; |
|
199 fun counted scan = map_position (fold Position.advance) scan >> implode; |
|
200 |
|
201 end; |
197 |
202 |
198 |
203 |
199 (* scan symbolic idents *) |
204 (* scan symbolic idents *) |
200 |
205 |
201 val is_sym_char = member (op =) (explode "!#$%&*+-/<=>?@^_|~"); |
206 val is_sym_char = member (op =) (explode "!#$%&*+-/<=>?@^_|~"); |
202 |
207 |
203 val scan_symid = |
208 val scan_symid = |
204 Scan.many1 is_sym_char >> implode || |
209 Scan.many1 is_sym_char || |
205 Scan.one Symbol.is_symbolic; |
210 Scan.one Symbol.is_symbolic >> single; |
206 |
211 |
207 fun is_symid str = |
212 fun is_symid str = |
208 (case try Symbol.explode str of |
213 (case try Symbol.explode str of |
209 SOME [s] => Symbol.is_symbolic s orelse is_sym_char s |
214 SOME [s] => Symbol.is_symbolic s orelse is_sym_char s |
210 | SOME ss => forall is_sym_char ss |
215 | SOME ss => forall is_sym_char ss |
219 (* scan strings *) |
224 (* scan strings *) |
220 |
225 |
221 local |
226 local |
222 |
227 |
223 val char_code = |
228 val char_code = |
224 Scan.one Symbol.is_ascii_digit -- |
229 count (Scan.one Symbol.is_ascii_digit) -- |
225 Scan.one Symbol.is_ascii_digit -- |
230 count (Scan.one Symbol.is_ascii_digit) -- |
226 Scan.one Symbol.is_ascii_digit :|-- (fn ((a, b), c) => |
231 count (Scan.one Symbol.is_ascii_digit) :|-- |
|
232 (fn ((a, b), c) => |
227 let val (n, _) = Library.read_int [a, b, c] |
233 let val (n, _) = Library.read_int [a, b, c] |
228 in if n <= 255 then Scan.succeed (chr n) else Scan.fail end); |
234 in if n <= 255 then Scan.succeed (chr n) else Scan.fail end); |
229 |
235 |
230 fun scan_str q = |
236 fun scan_str q = |
231 scan_blank || |
237 count ($$ "\\") |-- !!! "bad escape character in string" (count ($$ q || $$ "\\") || char_code) || |
232 keep_line ($$ "\\") |-- !!! "bad escape character in string" |
238 count (Scan.one (fn s => s <> q andalso s <> "\\" andalso Symbol.is_regular s)); |
233 (scan_blank || keep_line ($$ q || $$ "\\" || char_code)) || |
|
234 keep_line (Scan.one (fn s => s <> q andalso s <> "\\" andalso Symbol.is_regular s)); |
|
235 |
239 |
236 fun scan_strs q = |
240 fun scan_strs q = |
237 keep_line ($$ q) |-- |
241 count ($$ q) |-- |
238 !!! "missing quote at end of string" |
242 !!! "missing quote at end of string" |
239 (change_prompt ((Scan.repeat (scan_str q) >> implode) --| keep_line ($$ q))); |
243 (change_prompt ((Scan.repeat (scan_str q) >> implode) --| count ($$ q))); |
240 |
244 |
241 in |
245 in |
242 |
246 |
243 val scan_string = scan_strs "\""; |
247 val scan_string = scan_strs "\""; |
244 val scan_alt_string = scan_strs "`"; |
248 val scan_alt_string = scan_strs "`"; |
247 |
251 |
248 |
252 |
249 (* scan verbatim text *) |
253 (* scan verbatim text *) |
250 |
254 |
251 val scan_verb = |
255 val scan_verb = |
252 scan_blank || |
256 count ($$ "*" --| Scan.ahead (~$$ "}")) || |
253 keep_line ($$ "*" --| Scan.ahead (~$$ "}")) || |
257 count (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)); |
254 keep_line (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)); |
|
255 |
258 |
256 val scan_verbatim = |
259 val scan_verbatim = |
257 keep_line ($$ "{" -- $$ "*") |-- |
260 count ($$ "{") |-- count ($$ "*") |-- |
258 !!! "missing end of verbatim text" |
261 !!! "missing end of verbatim text" |
259 (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "*" -- $$ "}"))); |
262 (change_prompt ((Scan.repeat scan_verb >> implode) --| count ($$ "*") --| count ($$ "}"))); |
260 |
263 |
261 |
264 |
262 (* scan space *) |
265 (* scan space *) |
263 |
266 |
264 fun is_space s = Symbol.is_blank s andalso s <> "\n"; |
267 fun is_space s = Symbol.is_blank s andalso s <> "\n"; |
265 |
268 |
266 val scan_space = |
269 val scan_space = |
267 (keep_line (Scan.many1 is_space) -- Scan.optional (incr_line ($$ "\n")) "" || |
270 (Scan.many1 is_space @@@ Scan.optional ($$ "\n" >> single) [] || |
268 keep_line (Scan.many is_space) -- incr_line ($$ "\n")) >> (fn (cs, c) => implode cs ^ c); |
271 Scan.many is_space @@@ ($$ "\n" >> single)); |
269 |
272 |
270 |
273 |
271 (* scan nested comments *) |
274 (* scan nested comments *) |
272 |
275 |
273 val scan_cmt = |
276 val scan_cmt = |
274 Scan.lift scan_blank || |
277 Scan.depend (fn d => count ($$ "(") ^^ count ($$ "*") >> pair (d + 1)) || |
275 Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) || |
278 Scan.depend (fn 0 => Scan.fail | d => count ($$ "*") ^^ count ($$ ")") >> pair (d - 1)) || |
276 Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) || |
279 Scan.lift (count ($$ "*" --| Scan.ahead (~$$ ")"))) || |
277 Scan.lift (keep_line ($$ "*" --| Scan.ahead (~$$ ")"))) || |
280 Scan.lift (count (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s))); |
278 Scan.lift (keep_line (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s))); |
|
279 |
281 |
280 val scan_comment = |
282 val scan_comment = |
281 keep_line ($$ "(" -- $$ "*") |-- |
283 count ($$ "(") |-- count ($$ "*") |-- |
282 !!! "missing end of comment" |
284 !!! "missing end of comment" |
283 (change_prompt |
285 (change_prompt |
284 (Scan.pass 0 (Scan.repeat scan_cmt >> implode) --| keep_line ($$ "*" -- $$ ")"))); |
286 (Scan.pass 0 (Scan.repeat scan_cmt >> implode) --| count ($$ "*") --| count ($$ ")"))); |
285 |
287 |
286 |
288 |
287 (* scan malformed symbols *) |
289 (* scan malformed symbols *) |
288 |
290 |
289 local |
|
290 |
|
291 val scan_mal = |
|
292 scan_blank || |
|
293 keep_line (Scan.one Symbol.is_regular); |
|
294 |
|
295 in |
|
296 |
|
297 val scan_malformed = |
291 val scan_malformed = |
298 keep_line ($$ Symbol.malformed) |-- |
292 $$ Symbol.malformed |-- |
299 change_prompt (Scan.repeat scan_mal >> implode) |
293 change_prompt (Scan.many Symbol.is_regular) |
300 --| keep_line (Scan.option ($$ Symbol.end_malformed)); |
294 --| Scan.option ($$ Symbol.end_malformed); |
301 |
|
302 end; |
|
303 |
295 |
304 |
296 |
305 (* scan token *) |
297 (* scan token *) |
306 |
298 |
307 fun scan (lex1, lex2) = |
299 fun scan (lex1, lex2) = |
312 fun sync _ = token Sync Symbol.sync; |
304 fun sync _ = token Sync Symbol.sync; |
313 in |
305 in |
314 scan_string >> token String || |
306 scan_string >> token String || |
315 scan_alt_string >> token AltString || |
307 scan_alt_string >> token AltString || |
316 scan_verbatim >> token Verbatim || |
308 scan_verbatim >> token Verbatim || |
317 scan_space >> token Space || |
|
318 scan_comment >> token Comment || |
309 scan_comment >> token Comment || |
319 scan_malformed >> token Malformed || |
310 counted scan_space >> token Space || |
320 keep_line (Scan.one Symbol.is_sync >> sync) || |
311 counted scan_malformed >> token Malformed || |
321 keep_line (Scan.max token_leq |
312 Scan.lift (Scan.one Symbol.is_sync >> sync) || |
|
313 (Scan.max token_leq |
322 (Scan.max token_leq |
314 (Scan.max token_leq |
323 (Scan.literal lex1 >> (token Keyword o implode)) |
315 (counted (Scan.literal lex1) >> token Keyword) |
324 (Scan.literal lex2 >> (token Command o implode))) |
316 (counted (Scan.literal lex2) >> token Command)) |
325 (Syntax.scan_longid >> token LongIdent || |
317 (counted Syntax.scan_longid >> token LongIdent || |
326 Syntax.scan_id >> token Ident || |
318 counted Syntax.scan_id >> token Ident || |
327 Syntax.scan_var >> token Var || |
319 counted Syntax.scan_var >> token Var || |
328 Syntax.scan_tid >> token TypeIdent || |
320 counted Syntax.scan_tid >> token TypeIdent || |
329 Syntax.scan_tvar >> token TypeVar || |
321 counted Syntax.scan_tvar >> token TypeVar || |
330 Syntax.scan_nat >> token Nat || |
322 counted Syntax.scan_nat >> token Nat || |
331 scan_symid >> token SymIdent)) |
323 counted scan_symid >> token SymIdent)) |
332 end); |
324 end); |
333 in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end; |
325 in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end; |
334 |
326 |
335 |
327 |
336 (* token sources *) |
328 (* token sources *) |
338 local |
330 local |
339 |
331 |
340 val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular; |
332 val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular; |
341 |
333 |
342 fun recover msg = Scan.state :|-- (fn pos => |
334 fun recover msg = Scan.state :|-- (fn pos => |
343 keep_line (Scan.many is_junk) >> (fn xs => [Token (pos, (Error msg, implode xs))])); |
335 counted (Scan.many is_junk) >> (fn s => [Token (pos, (Error msg, s))])); |
344 |
336 |
345 in |
337 in |
346 |
338 |
347 fun source do_recover get_lex pos src = |
339 fun source do_recover get_lex pos src = |
348 Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) |
340 Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) |