7 |
7 |
8 signature OUTER_LEX = |
8 signature OUTER_LEX = |
9 sig |
9 sig |
10 datatype token_kind = |
10 datatype token_kind = |
11 Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat | |
11 Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat | |
12 String | Verbatim | Ignore | EOF |
12 String | Verbatim | Ignore | Sync | EOF |
13 type token |
13 type token |
14 val str_of_kind: token_kind -> string |
14 val str_of_kind: token_kind -> string |
15 val stopper: token * (token -> bool) |
15 val stopper: token * (token -> bool) |
|
16 val not_sync: token -> bool |
16 val not_eof: token -> bool |
17 val not_eof: token -> bool |
17 val position_of: token -> Position.T |
18 val position_of: token -> Position.T |
18 val pos_of: token -> string |
19 val pos_of: token -> string |
19 val is_kind: token_kind -> token -> bool |
20 val is_kind: token_kind -> token -> bool |
20 val keyword_pred: (string -> bool) -> token -> bool |
21 val keyword_pred: (string -> bool) -> token -> bool |
53 | TypeVar => "schematic type variable" |
54 | TypeVar => "schematic type variable" |
54 | Nat => "number" |
55 | Nat => "number" |
55 | String => "string" |
56 | String => "string" |
56 | Verbatim => "verbatim text" |
57 | Verbatim => "verbatim text" |
57 | Ignore => "ignored text" |
58 | Ignore => "ignored text" |
|
59 | Sync => "sync marker" |
58 | EOF => "end-of-file"; |
60 | EOF => "end-of-file"; |
|
61 |
|
62 |
|
63 (* sync token *) |
|
64 |
|
65 fun not_sync (Token (_, (Sync, _))) = false |
|
66 | not_sync _ = true; |
59 |
67 |
60 |
68 |
61 (* eof token *) |
69 (* eof token *) |
62 |
70 |
63 val eof = Token (Position.none, (EOF, "")); |
71 val eof = Token (Position.none, (EOF, "")); |
129 |
137 |
130 (* scan strings *) |
138 (* scan strings *) |
131 |
139 |
132 val scan_str = |
140 val scan_str = |
133 scan_blank >> K Symbol.space || |
141 scan_blank >> K Symbol.space || |
134 keep_line ($$ "\\" |-- Scan.one Symbol.not_eof) || |
142 keep_line ($$ "\\" |-- Scan.one (Symbol.not_sync andf Symbol.not_eof)) || |
135 keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf Symbol.not_eof)); |
143 keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf |
|
144 Symbol.not_sync andf Symbol.not_eof)); |
136 |
145 |
137 val scan_string = |
146 val scan_string = |
138 keep_line ($$ "\"") |-- |
147 keep_line ($$ "\"") |-- |
139 !! (lex_err (K "missing quote at end of string")) |
148 !! (lex_err (K "missing quote at end of string")) |
140 (change_prompt ((Scan.repeat scan_str >> implode) --| keep_line ($$ "\""))); |
149 (change_prompt ((Scan.repeat scan_str >> implode) --| keep_line ($$ "\""))); |
143 (* scan verbatim text *) |
152 (* scan verbatim text *) |
144 |
153 |
145 val scan_verb = |
154 val scan_verb = |
146 scan_blank || |
155 scan_blank || |
147 keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal "}"))) || |
156 keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal "}"))) || |
148 keep_line (Scan.one (not_equal "*" andf Symbol.not_eof)); |
157 keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof)); |
149 |
158 |
150 val scan_verbatim = |
159 val scan_verbatim = |
151 keep_line ($$ "{" -- $$ "*") |-- |
160 keep_line ($$ "{" -- $$ "*") |-- |
152 !! (lex_err (K "missing end of verbatim text")) |
161 !! (lex_err (K "missing end of verbatim text")) |
153 (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "*" -- $$ "}"))); |
162 (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "*" -- $$ "}"))); |
167 val scan_cmt = |
176 val scan_cmt = |
168 Scan.lift scan_blank || |
177 Scan.lift scan_blank || |
169 Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) || |
178 Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) || |
170 Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) || |
179 Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) || |
171 Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) || |
180 Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) || |
172 Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_eof))); |
181 Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof))); |
173 |
182 |
174 val scan_comment = |
183 val scan_comment = |
175 keep_line ($$ "(" -- $$ "*") |-- |
184 keep_line ($$ "(" -- $$ "*") |-- |
176 !! (lex_err (K "missing end of comment")) |
185 !! (lex_err (K "missing end of comment")) |
177 (change_prompt |
186 (change_prompt |
182 |
191 |
183 fun scan lex (pos, cs) = |
192 fun scan lex (pos, cs) = |
184 let |
193 let |
185 fun token k x = Token (pos, (k, x)); |
194 fun token k x = Token (pos, (k, x)); |
186 fun ignore _ = token Ignore ""; |
195 fun ignore _ = token Ignore ""; |
|
196 fun sync _ = token Sync Symbol.sync; |
187 |
197 |
188 val scanner = |
198 val scanner = |
189 scan_string >> token String || |
199 scan_string >> token String || |
190 scan_verbatim >> token Verbatim || |
200 scan_verbatim >> token Verbatim || |
191 scan_space >> ignore || |
201 scan_space >> ignore || |
192 scan_comment >> ignore || |
202 scan_comment >> ignore || |
|
203 keep_line (Scan.one Symbol.is_sync >> sync) || |
193 keep_line (Scan.max token_leq |
204 keep_line (Scan.max token_leq |
194 (Scan.literal lex >> (token Keyword o implode)) |
205 (Scan.literal lex >> (token Keyword o implode)) |
195 (Syntax.scan_longid >> token LongIdent || |
206 (Syntax.scan_longid >> token LongIdent || |
196 Syntax.scan_id >> token Ident || |
207 Syntax.scan_id >> token Ident || |
197 Syntax.scan_var >> token Var || |
208 Syntax.scan_var >> token Var || |
198 $$ "?" ^^ $$ "?" ^^ Syntax.scan_id >> token TextVar || |
209 $$ "?" ^^ $$ "?" ^^ Syntax.scan_id >> token TextVar || |
199 Syntax.scan_tid >> token TypeIdent || |
210 Syntax.scan_tid >> token TypeIdent || |
200 Syntax.scan_tvar >> token TypeVar || |
211 Syntax.scan_tvar >> token TypeVar || |
201 Syntax.scan_nat >> token Nat || |
212 Syntax.scan_nat >> token Nat || |
202 scan_symid >> token SymIdent)); |
213 scan_symid >> token SymIdent)); |
203 in |
214 in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning cs))) scanner (pos, cs) end; |
204 !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning cs))) scanner (pos, cs) |
|
205 end; |
|
206 |
215 |
207 |
216 |
208 (* source of (proper) tokens *) |
217 (* source of (proper) tokens *) |
209 |
218 |
210 fun recover xs = keep_line (Scan.any1 ((not o Symbol.is_blank) andf Symbol.not_eof)) xs; |
219 val is_junk = (not o Symbol.is_blank) andf Symbol.not_sync andf Symbol.not_eof; |
|
220 fun recover xs = keep_line (Scan.any1 is_junk) xs; |
211 |
221 |
212 fun source do_recover get_lex pos src = |
222 fun source do_recover get_lex pos src = |
213 Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) |
223 Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) |
214 (if do_recover then Some recover else None) src |
224 (if do_recover then Some recover else None) src |
215 |> Source.filter is_proper; |
225 |> Source.filter is_proper; |