24 val source: (Symbol.symbol, 'a) Source.source -> |
24 val source: (Symbol.symbol, 'a) Source.source -> |
25 (token, (Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source) |
25 (token, (Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source) |
26 Source.source) Source.source |
26 Source.source) Source.source |
27 val tokenize: string -> token list |
27 val tokenize: string -> token list |
28 val read: Position.T -> Symbol_Pos.text -> token Antiquote.antiquote list |
28 val read: Position.T -> Symbol_Pos.text -> token Antiquote.antiquote list |
29 val read_source: Symbol_Pos.source -> token Antiquote.antiquote list |
29 val read_source: bool -> Symbol_Pos.source -> token Antiquote.antiquote list |
30 end; |
30 end; |
31 |
31 |
32 structure ML_Lex: ML_LEX = |
32 structure ML_Lex: ML_LEX = |
33 struct |
33 struct |
34 |
34 |
130 |
130 |
131 (* markup *) |
131 (* markup *) |
132 |
132 |
133 local |
133 local |
134 |
134 |
135 val token_kind_markup = |
135 fun token_kind_markup SML = |
136 fn Keyword => (Markup.empty, "") |
136 fn Keyword => (Markup.empty, "") |
137 | Ident => (Markup.empty, "") |
137 | Ident => (Markup.empty, "") |
138 | LongIdent => (Markup.empty, "") |
138 | LongIdent => (Markup.empty, "") |
139 | TypeVar => (Markup.ML_tvar, "") |
139 | TypeVar => (Markup.ML_tvar, "") |
140 | Word => (Markup.ML_numeral, "") |
140 | Word => (Markup.ML_numeral, "") |
141 | Int => (Markup.ML_numeral, "") |
141 | Int => (Markup.ML_numeral, "") |
142 | Real => (Markup.ML_numeral, "") |
142 | Real => (Markup.ML_numeral, "") |
143 | Char => (Markup.ML_char, "") |
143 | Char => (Markup.ML_char, "") |
144 | String => (Markup.ML_string, "") |
144 | String => (if SML then Markup.SML_string else Markup.ML_string, "") |
145 | Space => (Markup.empty, "") |
145 | Space => (Markup.empty, "") |
146 | Comment => (Markup.ML_comment, "") |
146 | Comment => (if SML then Markup.SML_comment else Markup.ML_comment, "") |
147 | Error msg => (Markup.bad, msg) |
147 | Error msg => (Markup.bad, msg) |
148 | EOF => (Markup.empty, ""); |
148 | EOF => (Markup.empty, ""); |
149 |
149 |
150 in |
150 in |
151 |
151 |
152 fun report_of_token (tok as Token ((pos, _), (kind, x))) = |
152 fun report_of_token SML (tok as Token ((pos, _), (kind, x))) = |
153 let |
153 let |
154 val (markup, txt) = |
154 val (markup, txt) = |
155 if not (is_keyword tok) then token_kind_markup kind |
155 if not (is_keyword tok) then token_kind_markup SML kind |
156 else if is_delimiter tok then (Markup.ML_delimiter, "") |
156 else if is_delimiter tok then (Markup.ML_delimiter, "") |
157 else if member (op =) keywords2 x then (Markup.ML_keyword2, "") |
157 else if member (op =) keywords2 x then (Markup.ML_keyword2, "") |
158 else if member (op =) keywords3 x then (Markup.ML_keyword3, "") |
158 else if member (op =) keywords3 x then (Markup.ML_keyword3, "") |
159 else (Markup.ML_keyword1, ""); |
159 else (Markup.ML_keyword1, ""); |
160 in ((pos, markup), txt) end; |
160 in ((pos, markup), txt) end; |
289 recover_string || |
289 recover_string || |
290 Symbol_Pos.recover_comment || |
290 Symbol_Pos.recover_comment || |
291 Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> single) |
291 Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> single) |
292 >> (single o token (Error msg)); |
292 >> (single o token (Error msg)); |
293 |
293 |
294 in |
294 fun gen_read SML pos text = |
295 |
|
296 fun source src = |
|
297 Symbol_Pos.source (Position.line 1) src |
|
298 |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan_ml)) (SOME (false, recover)); |
|
299 |
|
300 val tokenize = Source.of_string #> Symbol.source #> source #> Source.exhaust; |
|
301 |
|
302 fun read pos text = |
|
303 let |
295 let |
304 val syms = Symbol_Pos.explode (text, pos); |
296 val syms = Symbol_Pos.explode (text, pos); |
305 val termination = |
297 val termination = |
306 if null syms then [] |
298 if null syms then [] |
307 else |
299 else |
308 let |
300 let |
309 val pos1 = List.last syms |-> Position.advance; |
301 val pos1 = List.last syms |-> Position.advance; |
310 val pos2 = Position.advance Symbol.space pos1; |
302 val pos2 = Position.advance Symbol.space pos1; |
311 in [Antiquote.Text (Token (Position.range pos1 pos2, (Space, Symbol.space)))] end; |
303 in [Antiquote.Text (Token (Position.range pos1 pos2, (Space, Symbol.space)))] end; |
|
304 |
|
305 val scan = if SML then scan_ml >> Antiquote.Text else scan_antiq; |
|
306 fun check (Antiquote.Text tok) = (check_content_of tok; if SML then () else warn tok) |
|
307 | check _ = (); |
312 val input = |
308 val input = |
313 (Source.of_list syms |
309 Source.of_list syms |
314 |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan_antiq)) |
310 |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan)) |
315 (SOME (false, fn msg => recover msg >> map Antiquote.Text)) |
311 (SOME (false, fn msg => recover msg >> map Antiquote.Text)) |
316 |> Source.exhaust |
312 |> Source.exhaust |
317 |> tap (Position.reports_text o Antiquote.antiquote_reports (single o report_of_token)) |
313 |> tap (Position.reports_text o Antiquote.antiquote_reports (single o report_of_token SML)) |
318 |> tap (List.app (fn Antiquote.Text tok => (check_content_of tok; warn tok) | _ => ()))); |
314 |> tap (List.app check); |
319 in input @ termination end; |
315 in input @ termination end; |
320 |
316 |
321 fun read_source {delimited, text, pos} = |
317 in |
322 (Position.report pos (Markup.language_ML delimited); read pos text); |
318 |
323 |
319 fun source src = |
324 end; |
320 Symbol_Pos.source (Position.line 1) src |
325 |
321 |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan_ml)) (SOME (false, recover)); |
326 end; |
322 |
327 |
323 val tokenize = Source.of_string #> Symbol.source #> source #> Source.exhaust; |
|
324 |
|
325 val read = gen_read false; |
|
326 |
|
327 fun read_source SML {delimited, text, pos} = |
|
328 (Position.report pos ((if SML then Markup.language_SML else Markup.language_ML) delimited); |
|
329 gen_read SML pos text); |
|
330 |
|
331 end; |
|
332 |
|
333 end; |
|
334 |