author | wenzelm |
Thu, 01 Jul 1999 21:20:27 +0200 | |
changeset 6875 | df31250ccb3a |
parent 6859 | 2b3db2b6c129 |
child 7026 | 69724548fad1 |
permissions | -rw-r--r-- |
5825 | 1 |
(* Title: Pure/Isar/outer_lex.ML |
2 |
ID: $Id$ |
|
3 |
Author: Markus Wenzel, TU Muenchen |
|
4 |
||
5 |
Outer lexical syntax for Isabelle/Isar. |
|
6 |
*) |
|
7 |
||
8 |
signature OUTER_LEX = |
|
9 |
sig |
|
10 |
datatype token_kind = |
|
11 |
Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat | |
|
6859 | 12 |
String | Verbatim | Ignore | Sync | EOF |
5825 | 13 |
type token |
14 |
val str_of_kind: token_kind -> string |
|
15 |
val stopper: token * (token -> bool) |
|
6859 | 16 |
val not_sync: token -> bool |
5825 | 17 |
val not_eof: token -> bool |
18 |
val position_of: token -> Position.T |
|
19 |
val pos_of: token -> string |
|
20 |
val is_kind: token_kind -> token -> bool |
|
21 |
val keyword_pred: (string -> bool) -> token -> bool |
|
22 |
val name_of: token -> string |
|
23 |
val is_proper: token -> bool |
|
24 |
val val_of: token -> string |
|
5876 | 25 |
val is_sid: string -> bool |
5825 | 26 |
val scan: Scan.lexicon -> |
27 |
Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list) |
|
28 |
val source: bool -> (unit -> Scan.lexicon) -> Position.T -> (Symbol.symbol, 'a) Source.source -> |
|
29 |
(token, (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source |
|
30 |
end; |
|
31 |
||
32 |
structure OuterLex: OUTER_LEX = |
|
33 |
struct |
|
34 |
||
35 |
||
36 |
(** tokens **) |
|
37 |
||
38 |
(* datatype token *) |
|
39 |
||
40 |
datatype token_kind = |
|
41 |
Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat | |
|
6859 | 42 |
String | Verbatim | Ignore | Sync | EOF; |
5825 | 43 |
|
44 |
datatype token = Token of Position.T * (token_kind * string); |
|
45 |
||
46 |
val str_of_kind = |
|
47 |
fn Keyword => "keyword" |
|
48 |
| Ident => "identifier" |
|
49 |
| LongIdent => "long identifier" |
|
50 |
| SymIdent => "symbolic identifier" |
|
51 |
| Var => "schematic variable" |
|
52 |
| TextVar => "text variable" |
|
53 |
| TypeIdent => "type variable" |
|
54 |
| TypeVar => "schematic type variable" |
|
55 |
| Nat => "number" |
|
56 |
| String => "string" |
|
57 |
| Verbatim => "verbatim text" |
|
58 |
| Ignore => "ignored text" |
|
6859 | 59 |
| Sync => "sync marker" |
5825 | 60 |
| EOF => "end-of-file"; |
61 |
||
62 |
||
6859 | 63 |
(* sync token *) |
64 |
||
65 |
fun not_sync (Token (_, (Sync, _))) = false |
|
66 |
| not_sync _ = true; |
|
67 |
||
68 |
||
5825 | 69 |
(* eof token *) |
70 |
||
71 |
val eof = Token (Position.none, (EOF, "")); |
|
72 |
||
73 |
fun is_eof (Token (_, (EOF, _))) = true |
|
74 |
| is_eof _ = false; |
|
75 |
||
76 |
val stopper = (eof, is_eof); |
|
77 |
val not_eof = not o is_eof; |
|
78 |
||
79 |
||
80 |
(* get position *) |
|
81 |
||
82 |
fun position_of (Token (pos, _)) = pos; |
|
83 |
val pos_of = Position.str_of o position_of; |
|
84 |
||
85 |
||
86 |
(* kind of token *) |
|
87 |
||
88 |
fun is_kind k (Token (_, (k', _))) = k = k'; |
|
89 |
||
90 |
fun keyword_pred pred (Token (_, (Keyword, x))) = pred x |
|
91 |
| keyword_pred _ _ = false; |
|
92 |
||
93 |
fun name_of (Token (_, (k, _))) = str_of_kind k; |
|
94 |
||
95 |
fun is_proper (Token (_, (Ignore, _))) = false |
|
96 |
| is_proper _ = true; |
|
97 |
||
98 |
||
99 |
(* value of token *) |
|
100 |
||
101 |
fun val_of (Token (_, (_, x))) = x; |
|
102 |
||
103 |
fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x'; |
|
104 |
||
105 |
||
106 |
||
107 |
(** scanners **) |
|
108 |
||
109 |
fun change_prompt scan = Scan.prompt "# " scan; |
|
110 |
||
111 |
||
112 |
(* diagnostics *) |
|
113 |
||
114 |
fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs; |
|
115 |
||
116 |
||
117 |
(* line numbering *) |
|
118 |
||
119 |
fun incr_line scan = Scan.depend (fn pos => scan >> pair (Position.inc pos)); |
|
120 |
val keep_line = Scan.lift; |
|
121 |
||
122 |
val scan_blank = |
|
123 |
incr_line ($$ "\n") || |
|
124 |
keep_line (Scan.one Symbol.is_blank); |
|
125 |
||
126 |
||
127 |
(* scan symbolic idents *) |
|
128 |
||
129 |
val sym_chars = explode "!#$%&*+-/:<=>?@^_`|~"; |
|
130 |
fun is_sym_char s = s mem sym_chars; |
|
131 |
||
5876 | 132 |
val scan_symid = Scan.any1 is_sym_char >> implode; |
5825 | 133 |
|
5876 | 134 |
fun is_symid s = s <> "" andalso forall is_sym_char (Symbol.explode s); |
135 |
val is_sid = is_symid orf Syntax.is_identifier; |
|
5825 | 136 |
|
137 |
||
138 |
(* scan strings *) |
|
139 |
||
140 |
val scan_str = |
|
141 |
scan_blank >> K Symbol.space || |
|
6859 | 142 |
keep_line ($$ "\\" |-- Scan.one (Symbol.not_sync andf Symbol.not_eof)) || |
143 |
keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf |
|
144 |
Symbol.not_sync andf Symbol.not_eof)); |
|
5825 | 145 |
|
146 |
val scan_string = |
|
147 |
keep_line ($$ "\"") |-- |
|
148 |
!! (lex_err (K "missing quote at end of string")) |
|
149 |
(change_prompt ((Scan.repeat scan_str >> implode) --| keep_line ($$ "\""))); |
|
150 |
||
151 |
||
152 |
(* scan verbatim text *) |
|
153 |
||
154 |
val scan_verb = |
|
155 |
scan_blank || |
|
6743
5d50225637c8
changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents:
5876
diff
changeset
|
156 |
keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal "}"))) || |
6859 | 157 |
keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof)); |
5825 | 158 |
|
159 |
val scan_verbatim = |
|
6743
5d50225637c8
changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents:
5876
diff
changeset
|
160 |
keep_line ($$ "{" -- $$ "*") |-- |
5825 | 161 |
!! (lex_err (K "missing end of verbatim text")) |
6743
5d50225637c8
changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents:
5876
diff
changeset
|
162 |
(change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "*" -- $$ "}"))); |
5825 | 163 |
|
164 |
||
165 |
(* scan space *) |
|
166 |
||
167 |
val is_space = Symbol.is_blank andf not_equal "\n"; |
|
168 |
||
169 |
val scan_space = |
|
170 |
keep_line (Scan.any1 is_space) |-- Scan.optional (incr_line ($$ "\n")) "" || |
|
171 |
keep_line (Scan.any is_space) |-- incr_line ($$ "\n"); |
|
172 |
||
173 |
||
174 |
(* scan nested comments *) |
|
175 |
||
176 |
val scan_cmt = |
|
177 |
Scan.lift scan_blank || |
|
178 |
Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) || |
|
179 |
Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) || |
|
180 |
Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) || |
|
6859 | 181 |
Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof))); |
5825 | 182 |
|
183 |
val scan_comment = |
|
184 |
keep_line ($$ "(" -- $$ "*") |-- |
|
185 |
!! (lex_err (K "missing end of comment")) |
|
186 |
(change_prompt |
|
187 |
(Scan.pass 0 (Scan.repeat scan_cmt) |-- keep_line ($$ "*" -- $$ ")") >> K "")); |
|
188 |
||
189 |
||
190 |
(* scan token *) |
|
191 |
||
192 |
fun scan lex (pos, cs) = |
|
193 |
let |
|
194 |
fun token k x = Token (pos, (k, x)); |
|
195 |
fun ignore _ = token Ignore ""; |
|
6859 | 196 |
fun sync _ = token Sync Symbol.sync; |
5825 | 197 |
|
198 |
val scanner = |
|
199 |
scan_string >> token String || |
|
200 |
scan_verbatim >> token Verbatim || |
|
201 |
scan_space >> ignore || |
|
202 |
scan_comment >> ignore || |
|
6859 | 203 |
keep_line (Scan.one Symbol.is_sync >> sync) || |
5825 | 204 |
keep_line (Scan.max token_leq |
205 |
(Scan.literal lex >> (token Keyword o implode)) |
|
206 |
(Syntax.scan_longid >> token LongIdent || |
|
207 |
Syntax.scan_id >> token Ident || |
|
208 |
Syntax.scan_var >> token Var || |
|
209 |
$$ "?" ^^ $$ "?" ^^ Syntax.scan_id >> token TextVar || |
|
210 |
Syntax.scan_tid >> token TypeIdent || |
|
211 |
Syntax.scan_tvar >> token TypeVar || |
|
212 |
Syntax.scan_nat >> token Nat || |
|
213 |
scan_symid >> token SymIdent)); |
|
6859 | 214 |
in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning cs))) scanner (pos, cs) end; |
5825 | 215 |
|
216 |
||
217 |
(* source of (proper) tokens *) |
|
218 |
||
6859 | 219 |
val is_junk = (not o Symbol.is_blank) andf Symbol.not_sync andf Symbol.not_eof; |
220 |
fun recover xs = keep_line (Scan.any1 is_junk) xs; |
|
5825 | 221 |
|
222 |
fun source do_recover get_lex pos src = |
|
223 |
Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) |
|
224 |
(if do_recover then Some recover else None) src |
|
225 |
|> Source.filter is_proper; |
|
226 |
||
227 |
||
228 |
end; |