5825
|
1 |
(* Title: Pure/Isar/outer_lex.ML
|
|
2 |
ID: $Id$
|
|
3 |
Author: Markus Wenzel, TU Muenchen
|
|
4 |
|
|
5 |
Outer lexical syntax for Isabelle/Isar.
|
|
6 |
*)
|
|
7 |
|
|
8 |
signature OUTER_LEX =
|
|
9 |
sig
|
|
10 |
datatype token_kind =
|
|
11 |
Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat |
|
|
12 |
String | Verbatim | Ignore | EOF
|
|
13 |
type token
|
|
14 |
val str_of_kind: token_kind -> string
|
|
15 |
val stopper: token * (token -> bool)
|
|
16 |
val not_eof: token -> bool
|
|
17 |
val position_of: token -> Position.T
|
|
18 |
val pos_of: token -> string
|
|
19 |
val is_kind: token_kind -> token -> bool
|
|
20 |
val keyword_pred: (string -> bool) -> token -> bool
|
|
21 |
val name_of: token -> string
|
|
22 |
val is_proper: token -> bool
|
|
23 |
val val_of: token -> string
|
5876
|
24 |
val is_sid: string -> bool
|
5825
|
25 |
val scan: Scan.lexicon ->
|
|
26 |
Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
|
|
27 |
val source: bool -> (unit -> Scan.lexicon) -> Position.T -> (Symbol.symbol, 'a) Source.source ->
|
|
28 |
(token, (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source
|
|
29 |
end;
|
|
30 |
|
|
31 |
structure OuterLex: OUTER_LEX =
|
|
32 |
struct
|
|
33 |
|
|
34 |
|
|
35 |
(** tokens **)
|
|
36 |
|
|
37 |
(* datatype token *)
|
|
38 |
|
|
39 |
datatype token_kind =
|
|
40 |
Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat |
|
|
41 |
String | Verbatim | Ignore | EOF;
|
|
42 |
|
|
43 |
datatype token = Token of Position.T * (token_kind * string);
|
|
44 |
|
|
45 |
val str_of_kind =
|
|
46 |
fn Keyword => "keyword"
|
|
47 |
| Ident => "identifier"
|
|
48 |
| LongIdent => "long identifier"
|
|
49 |
| SymIdent => "symbolic identifier"
|
|
50 |
| Var => "schematic variable"
|
|
51 |
| TextVar => "text variable"
|
|
52 |
| TypeIdent => "type variable"
|
|
53 |
| TypeVar => "schematic type variable"
|
|
54 |
| Nat => "number"
|
|
55 |
| String => "string"
|
|
56 |
| Verbatim => "verbatim text"
|
|
57 |
| Ignore => "ignored text"
|
|
58 |
| EOF => "end-of-file";
|
|
59 |
|
|
60 |
|
|
61 |
(* eof token *)
|
|
62 |
|
|
63 |
val eof = Token (Position.none, (EOF, ""));
|
|
64 |
|
|
65 |
fun is_eof (Token (_, (EOF, _))) = true
|
|
66 |
| is_eof _ = false;
|
|
67 |
|
|
68 |
val stopper = (eof, is_eof);
|
|
69 |
val not_eof = not o is_eof;
|
|
70 |
|
|
71 |
|
|
72 |
(* get position *)
|
|
73 |
|
|
74 |
fun position_of (Token (pos, _)) = pos;
|
|
75 |
val pos_of = Position.str_of o position_of;
|
|
76 |
|
|
77 |
|
|
78 |
(* kind of token *)
|
|
79 |
|
|
80 |
fun is_kind k (Token (_, (k', _))) = k = k';
|
|
81 |
|
|
82 |
fun keyword_pred pred (Token (_, (Keyword, x))) = pred x
|
|
83 |
| keyword_pred _ _ = false;
|
|
84 |
|
|
85 |
fun name_of (Token (_, (k, _))) = str_of_kind k;
|
|
86 |
|
|
87 |
fun is_proper (Token (_, (Ignore, _))) = false
|
|
88 |
| is_proper _ = true;
|
|
89 |
|
|
90 |
|
|
91 |
(* value of token *)
|
|
92 |
|
|
93 |
fun val_of (Token (_, (_, x))) = x;
|
|
94 |
|
|
95 |
fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
|
|
96 |
|
|
97 |
|
|
98 |
|
|
99 |
(** scanners **)
|
|
100 |
|
|
101 |
fun change_prompt scan = Scan.prompt "# " scan;
|
|
102 |
|
|
103 |
|
|
104 |
(* diagnostics *)
|
|
105 |
|
|
106 |
fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs;
|
|
107 |
|
|
108 |
|
|
109 |
(* line numbering *)
|
|
110 |
|
|
111 |
fun incr_line scan = Scan.depend (fn pos => scan >> pair (Position.inc pos));
|
|
112 |
val keep_line = Scan.lift;
|
|
113 |
|
|
114 |
val scan_blank =
|
|
115 |
incr_line ($$ "\n") ||
|
|
116 |
keep_line (Scan.one Symbol.is_blank);
|
|
117 |
|
|
118 |
|
|
119 |
(* scan symbolic idents *)
|
|
120 |
|
|
121 |
val sym_chars = explode "!#$%&*+-/:<=>?@^_`|~";
|
|
122 |
fun is_sym_char s = s mem sym_chars;
|
|
123 |
|
5876
|
124 |
val scan_symid = Scan.any1 is_sym_char >> implode;
|
5825
|
125 |
|
5876
|
126 |
fun is_symid s = s <> "" andalso forall is_sym_char (Symbol.explode s);
|
|
127 |
val is_sid = is_symid orf Syntax.is_identifier;
|
5825
|
128 |
|
|
129 |
|
|
130 |
(* scan strings *)
|
|
131 |
|
|
132 |
val scan_str =
|
|
133 |
scan_blank >> K Symbol.space ||
|
|
134 |
keep_line ($$ "\\" |-- Scan.one Symbol.not_eof) ||
|
|
135 |
keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf Symbol.not_eof));
|
|
136 |
|
|
137 |
val scan_string =
|
|
138 |
keep_line ($$ "\"") |--
|
|
139 |
!! (lex_err (K "missing quote at end of string"))
|
|
140 |
(change_prompt ((Scan.repeat scan_str >> implode) --| keep_line ($$ "\"")));
|
|
141 |
|
|
142 |
|
|
143 |
(* scan verbatim text *)
|
|
144 |
|
|
145 |
val scan_verb =
|
|
146 |
scan_blank ||
|
|
147 |
keep_line ($$ "|" --| Scan.ahead (Scan.one (not_equal "}"))) ||
|
|
148 |
keep_line (Scan.one (not_equal "|" andf Symbol.not_eof));
|
|
149 |
|
|
150 |
val scan_verbatim =
|
|
151 |
keep_line ($$ "{" -- $$ "|") |--
|
|
152 |
!! (lex_err (K "missing end of verbatim text"))
|
|
153 |
(change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "|" -- $$ "}")));
|
|
154 |
|
|
155 |
|
|
156 |
(* scan space *)
|
|
157 |
|
|
158 |
val is_space = Symbol.is_blank andf not_equal "\n";
|
|
159 |
|
|
160 |
val scan_space =
|
|
161 |
keep_line (Scan.any1 is_space) |-- Scan.optional (incr_line ($$ "\n")) "" ||
|
|
162 |
keep_line (Scan.any is_space) |-- incr_line ($$ "\n");
|
|
163 |
|
|
164 |
|
|
165 |
(* scan nested comments *)
|
|
166 |
|
|
167 |
val scan_cmt =
|
|
168 |
Scan.lift scan_blank ||
|
|
169 |
Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
|
|
170 |
Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
|
|
171 |
Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) ||
|
|
172 |
Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_eof)));
|
|
173 |
|
|
174 |
val scan_comment =
|
|
175 |
keep_line ($$ "(" -- $$ "*") |--
|
|
176 |
!! (lex_err (K "missing end of comment"))
|
|
177 |
(change_prompt
|
|
178 |
(Scan.pass 0 (Scan.repeat scan_cmt) |-- keep_line ($$ "*" -- $$ ")") >> K ""));
|
|
179 |
|
|
180 |
|
|
181 |
(* scan token *)
|
|
182 |
|
|
183 |
fun scan lex (pos, cs) =
|
|
184 |
let
|
|
185 |
fun token k x = Token (pos, (k, x));
|
|
186 |
fun ignore _ = token Ignore "";
|
|
187 |
|
|
188 |
val scanner =
|
|
189 |
scan_string >> token String ||
|
|
190 |
scan_verbatim >> token Verbatim ||
|
|
191 |
scan_space >> ignore ||
|
|
192 |
scan_comment >> ignore ||
|
|
193 |
keep_line (Scan.max token_leq
|
|
194 |
(Scan.literal lex >> (token Keyword o implode))
|
|
195 |
(Syntax.scan_longid >> token LongIdent ||
|
|
196 |
Syntax.scan_id >> token Ident ||
|
|
197 |
Syntax.scan_var >> token Var ||
|
|
198 |
$$ "?" ^^ $$ "?" ^^ Syntax.scan_id >> token TextVar ||
|
|
199 |
Syntax.scan_tid >> token TypeIdent ||
|
|
200 |
Syntax.scan_tvar >> token TypeVar ||
|
|
201 |
Syntax.scan_nat >> token Nat ||
|
|
202 |
scan_symid >> token SymIdent));
|
|
203 |
in
|
|
204 |
!! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning cs))) scanner (pos, cs)
|
|
205 |
end;
|
|
206 |
|
|
207 |
|
|
208 |
(* source of (proper) tokens *)
|
|
209 |
|
|
210 |
fun recover xs = keep_line (Scan.any1 ((not o Symbol.is_blank) andf Symbol.not_eof)) xs;
|
|
211 |
|
|
212 |
fun source do_recover get_lex pos src =
|
|
213 |
Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
|
|
214 |
(if do_recover then Some recover else None) src
|
|
215 |
|> Source.filter is_proper;
|
|
216 |
|
|
217 |
|
|
218 |
end;
|