10 datatype token_kind = |
10 datatype token_kind = |
11 Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | Nat | |
11 Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | Nat | |
12 String | AltString | Verbatim | Space | Comment | Malformed | Error of string | Sync | EOF |
12 String | AltString | Verbatim | Space | Comment | Malformed | Error of string | Sync | EOF |
13 eqtype token |
13 eqtype token |
14 val str_of_kind: token_kind -> string |
14 val str_of_kind: token_kind -> string |
15 val range_of: token -> Position.range |
|
16 val position_of: token -> Position.T |
15 val position_of: token -> Position.T |
|
16 val end_position_of: token -> Position.T |
17 val pos_of: token -> string |
17 val pos_of: token -> string |
18 val eof: token |
18 val eof: token |
19 val is_eof: token -> bool |
19 val is_eof: token -> bool |
20 val not_eof: token -> bool |
20 val not_eof: token -> bool |
21 val not_sync: token -> bool |
21 val not_sync: token -> bool |
30 val is_begin_ignore: token -> bool |
30 val is_begin_ignore: token -> bool |
31 val is_end_ignore: token -> bool |
31 val is_end_ignore: token -> bool |
32 val is_blank: token -> bool |
32 val is_blank: token -> bool |
33 val is_newline: token -> bool |
33 val is_newline: token -> bool |
34 val val_of: token -> string |
34 val val_of: token -> string |
35 val clean_value: Symbol.symbol list -> Symbol.symbol list |
|
36 val source_of: token -> string |
35 val source_of: token -> string |
37 val unparse: token -> string |
36 val unparse: token -> string |
38 val text_of: token -> string * string |
37 val text_of: token -> string * string |
39 val is_sid: string -> bool |
38 val is_sid: string -> bool |
40 val !!! : string -> (Position.T * 'a -> 'b) -> Position.T * 'a -> 'b |
39 val !!! : string -> (Position.T * 'a -> 'b) -> Position.T * 'a -> 'b |
41 val scan_string: Position.T * Symbol.symbol list -> |
40 val scan_quoted: Position.T * Symbol.symbol list -> |
42 Symbol.symbol list * (Position.T * Symbol.symbol list) |
41 Symbol.symbol list * (Position.T * Symbol.symbol list) |
43 val scan: (Scan.lexicon * Scan.lexicon) -> |
42 val scan: (Scan.lexicon * Scan.lexicon) -> |
44 Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list) |
43 Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list) |
45 val source: bool option -> (unit -> Scan.lexicon * Scan.lexicon) -> |
44 val source: bool option -> (unit -> Scan.lexicon * Scan.lexicon) -> |
46 Position.T -> (Symbol.symbol, 'a) Source.source -> |
45 Position.T -> (Symbol.symbol, 'a) Source.source -> |
84 | EOF => "end-of-file"; |
83 | EOF => "end-of-file"; |
85 |
84 |
86 |
85 |
87 (* position *) |
86 (* position *) |
88 |
87 |
89 fun range_of (Token ((range, _), _)) = range; |
88 fun position_of (Token (((pos, _), _), _)) = pos; |
90 |
89 fun end_position_of (Token (((_, pos), _), _)) = pos; |
91 val position_of = #1 o range_of; |
90 |
92 val pos_of = Position.str_of o position_of; |
91 val pos_of = Position.str_of o position_of; |
93 |
92 |
94 |
93 |
95 (* control tokens *) |
94 (* control tokens *) |
96 |
95 |
103 val not_eof = not o is_eof; |
102 val not_eof = not o is_eof; |
104 |
103 |
105 fun not_sync (Token (_, (Sync, _))) = false |
104 fun not_sync (Token (_, (Sync, _))) = false |
106 | not_sync _ = true; |
105 | not_sync _ = true; |
107 |
106 |
108 val stopper = Scan.stopper (fn [] => eof | toks => mk_eof (#2 (range_of (List.last toks)))) is_eof; |
107 val stopper = |
|
108 Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof; |
109 |
109 |
110 |
110 |
111 (* kind of token *) |
111 (* kind of token *) |
112 |
112 |
113 fun kind_of (Token (_, (k, _))) = k; |
113 fun kind_of (Token (_, (k, _))) = k; |
147 |
147 |
148 |
148 |
149 (* token content *) |
149 (* token content *) |
150 |
150 |
151 fun val_of (Token (_, (_, x))) = x; |
151 fun val_of (Token (_, (_, x))) = x; |
152 |
|
153 val clean_value = filter_out (fn s => s = Symbol.DEL); |
|
154 |
152 |
155 fun source_of (Token ((range, src), _)) = |
153 fun source_of (Token ((range, src), _)) = |
156 XML.Elem (Markup.tokenN, Position.properties_of (Position.encode_range range), [XML.Text src]) |
154 XML.Elem (Markup.tokenN, Position.properties_of (Position.encode_range range), [XML.Text src]) |
157 |> YXML.string_of; |
155 |> YXML.string_of; |
158 |
156 |
244 in |
242 in |
245 |
243 |
246 val scan_string = scan_strs "\""; |
244 val scan_string = scan_strs "\""; |
247 val scan_alt_string = scan_strs "`"; |
245 val scan_alt_string = scan_strs "`"; |
248 |
246 |
|
247 val scan_quoted = Scan.depend (fn pos => |
|
248 Scan.trace (Scan.pass pos (scan_string || scan_alt_string)) |
|
249 >> (fn (_, syms) => (Position.advance syms pos, syms))); |
|
250 |
249 end; |
251 end; |
250 |
252 |
251 |
253 |
252 (* scan verbatim text *) |
254 (* scan verbatim text *) |
253 |
255 |
295 |
297 |
296 (* scan token *) |
298 (* scan token *) |
297 |
299 |
298 fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; |
300 fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; |
299 |
301 |
|
302 fun advance_range syms pos = |
|
303 let val pos' = Position.advance syms pos |
|
304 in (Position.encode_range (pos, pos'), pos') end; |
|
305 |
|
306 val clean_value = implode o filter_out (fn s => s = Symbol.DEL); |
|
307 |
|
308 |
300 fun scan (lex1, lex2) = |
309 fun scan (lex1, lex2) = |
301 let |
310 let |
302 val scanner = |
311 val scanner = Scan.depend (fn pos => Scan.pass pos |
303 (scan_string >> pair String || |
312 (scan_string >> pair String || |
304 scan_alt_string >> pair AltString || |
313 scan_alt_string >> pair AltString || |
305 scan_verbatim >> pair Verbatim || |
314 scan_verbatim >> pair Verbatim || |
306 scan_comment >> pair Comment || |
315 scan_comment >> pair Comment || |
307 Scan.lift scan_space >> pair Space || |
316 Scan.lift scan_space >> pair Space || |
315 Syntax.scan_id >> pair Ident || |
324 Syntax.scan_id >> pair Ident || |
316 Syntax.scan_var >> pair Var || |
325 Syntax.scan_var >> pair Var || |
317 Syntax.scan_tid >> pair TypeIdent || |
326 Syntax.scan_tid >> pair TypeIdent || |
318 Syntax.scan_tvar >> pair TypeVar || |
327 Syntax.scan_tvar >> pair TypeVar || |
319 Syntax.scan_nat >> pair Nat || |
328 Syntax.scan_nat >> pair Nat || |
320 scan_symid >> pair SymIdent)))) :|-- |
329 scan_symid >> pair SymIdent)))) >> |
321 (fn (k, syms) => Scan.depend (fn pos => |
330 (fn (k, syms) => |
322 let |
331 let val (range_pos, pos') = advance_range syms pos |
323 val pos' = Position.advance syms pos; |
332 in (pos', Token (((range_pos, pos'), implode syms), (k, clean_value syms))) end)); |
324 val x = implode (clean_value syms); |
|
325 in Scan.succeed (pos', Token (((pos, pos'), implode syms), (k, x))) end)); |
|
326 |
333 |
327 in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end; |
334 in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end; |
328 |
335 |
329 |
336 |
330 (* token sources *) |
337 (* token sources *) |
331 |
338 |
332 local |
339 local |
333 |
340 |
334 val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular; |
341 val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular; |
335 |
342 |
336 fun recover msg = Scan.lift (Scan.many is_junk) :|-- (fn syms => Scan.depend (fn pos => |
343 fun recover msg = Scan.depend (fn pos => Scan.many is_junk >> (fn syms => |
337 let |
344 let val (range_pos, pos') = advance_range syms pos |
338 val pos' = Position.advance syms pos; |
345 in (pos', [Token (((range_pos, pos'), implode syms), (Error msg, clean_value syms))]) end)); |
339 val x = implode (clean_value syms); |
|
340 in Scan.succeed (pos', [Token (((pos, pos'), implode syms), (Error msg, x))]) end)); |
|
341 |
346 |
342 in |
347 in |
343 |
348 |
344 fun source do_recover get_lex pos src = |
349 fun source do_recover get_lex pos src = |
345 Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) |
350 Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) |