author | paulson <lp15@cam.ac.uk> |
Sat, 13 Jun 2015 22:58:38 +0100 | |
changeset 60465 | 23dcee1e91ac |
parent 55106 | 080c0006e917 |
child 61476 | 1884c40f1539 |
permissions | -rw-r--r-- |
41561 | 1 |
(* Title: HOL/SPARK/Tools/fdl_lexer.ML |
2 |
Author: Stefan Berghofer |
|
3 |
Copyright: secunet Security Networks AG |
|
4 |
||
5 |
Lexical analyzer for fdl files. |
|
6 |
*) |
|
7 |
||
8 |
signature FDL_LEXER = |
|
9 |
sig |
|
10 |
type T |
|
11 |
type chars |
|
12 |
type banner |
|
13 |
type date |
|
14 |
type time |
|
15 |
datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF |
|
16 |
val tokenize: (chars -> 'a * chars) -> (chars -> T * chars) -> |
|
17 |
Position.T -> string -> 'a * T list |
|
18 |
val position_of: T -> Position.T |
|
19 |
val pos_of: T -> string |
|
20 |
val is_eof: T -> bool |
|
21 |
val stopper: T Scan.stopper |
|
22 |
val kind_of: T -> kind |
|
23 |
val content_of: T -> string |
|
24 |
val unparse: T -> string |
|
25 |
val is_proper: T -> bool |
|
26 |
val is_digit: string -> bool |
|
27 |
val c_comment: chars -> T * chars |
|
28 |
val curly_comment: chars -> T * chars |
|
29 |
val percent_comment: chars -> T * chars |
|
30 |
val vcg_header: chars -> (banner * (date * time) option) * chars |
|
31 |
val siv_header: chars -> |
|
32 |
(banner * (date * time) * (date * time) * (string * string)) * chars |
|
33 |
end; |
|
34 |
||
35 |
structure Fdl_Lexer: FDL_LEXER = |
|
36 |
struct |
|
37 |
||
38 |
(** tokens **) |
|
39 |
||
40 |
datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF; |
|
41 |
||
42 |
datatype T = Token of kind * string * Position.T; |
|
43 |
||
44 |
fun make_token k xs = Token (k, implode (map fst xs), |
|
45 |
case xs of [] => Position.none | (_, p) :: _ => p); |
|
46 |
||
47 |
fun kind_of (Token (k, _, _)) = k; |
|
48 |
||
49 |
fun is_proper (Token (Comment, _, _)) = false |
|
50 |
| is_proper _ = true; |
|
51 |
||
52 |
fun content_of (Token (_, s, _)) = s; |
|
53 |
||
54 |
fun unparse (Token (Traceability, s, _)) = "For " ^ s ^ ":" |
|
55 |
| unparse (Token (_, s, _)) = s; |
|
56 |
||
57 |
fun position_of (Token (_, _, pos)) = pos; |
|
58 |
||
48992 | 59 |
val pos_of = Position.here o position_of; |
41561 | 60 |
|
61 |
fun is_eof (Token (EOF, _, _)) = true |
|
62 |
| is_eof _ = false; |
|
63 |
||
64 |
fun mk_eof pos = Token (EOF, "", pos); |
|
65 |
val eof = mk_eof Position.none; |
|
66 |
||
67 |
val stopper = |
|
68 |
Scan.stopper (fn [] => eof | toks => mk_eof (position_of (List.last toks))) is_eof; |
|
69 |
||
70 |
fun leq_token (Token (_, s, _), Token (_, s', _)) = size s <= size s'; |
|
71 |
||
72 |
||
73 |
(** split up a string into a list of characters (with positions) **) |
|
74 |
||
75 |
type chars = (string * Position.T) list; |
|
76 |
||
77 |
fun is_char_eof ("", _) = true |
|
78 |
| is_char_eof _ = false; |
|
79 |
||
80 |
val char_stopper = Scan.stopper (K ("", Position.none)) is_char_eof; |
|
81 |
||
82 |
fun symbol (x : string, _ : Position.T) = x; |
|
83 |
||
84 |
fun explode_pos s pos = fst (fold_map (fn x => fn pos => |
|
41584
2b07a4212d6d
Replaced ad-hoc advance function by Position.advance
berghofe
parents:
41561
diff
changeset
|
85 |
((x, pos), Position.advance x pos)) (raw_explode s) pos); |
41561 | 86 |
|
87 |
||
88 |
(** scanners **) |
|
89 |
||
90 |
val any = Scan.one (not o Scan.is_stopper char_stopper); |
|
91 |
||
92 |
fun prfx [] = Scan.succeed [] |
|
93 |
| prfx (x :: xs) = Scan.one (equal x o symbol) ::: prfx xs; |
|
94 |
||
95 |
val $$$ = prfx o raw_explode; |
|
96 |
||
97 |
val lexicon = Scan.make_lexicon (map raw_explode |
|
98 |
["rule_family", |
|
99 |
"For", |
|
100 |
":", |
|
101 |
"[", |
|
102 |
"]", |
|
103 |
"(", |
|
104 |
")", |
|
105 |
",", |
|
106 |
"&", |
|
107 |
";", |
|
108 |
"=", |
|
109 |
".", |
|
110 |
"..", |
|
111 |
"requires", |
|
112 |
"may_be_replaced_by", |
|
113 |
"may_be_deduced", |
|
114 |
"may_be_deduced_from", |
|
115 |
"are_interchangeable", |
|
116 |
"if", |
|
117 |
"end", |
|
118 |
"function", |
|
119 |
"procedure", |
|
120 |
"type", |
|
121 |
"var", |
|
122 |
"const", |
|
123 |
"array", |
|
124 |
"record", |
|
125 |
":=", |
|
126 |
"of", |
|
127 |
"**", |
|
128 |
"*", |
|
129 |
"/", |
|
130 |
"div", |
|
131 |
"mod", |
|
132 |
"+", |
|
133 |
"-", |
|
134 |
"<>", |
|
135 |
"<", |
|
136 |
">", |
|
137 |
"<=", |
|
138 |
">=", |
|
139 |
"<->", |
|
140 |
"->", |
|
141 |
"not", |
|
142 |
"and", |
|
143 |
"or", |
|
144 |
"for_some", |
|
145 |
"for_all", |
|
146 |
"***", |
|
147 |
"!!!", |
|
148 |
"element", |
|
149 |
"update", |
|
150 |
"pending"]); |
|
151 |
||
152 |
fun keyword s = Scan.literal lexicon :|-- |
|
153 |
(fn xs => if map symbol xs = raw_explode s then Scan.succeed xs else Scan.fail); |
|
154 |
||
155 |
fun is_digit x = "0" <= x andalso x <= "9"; |
|
156 |
fun is_alpha x = "a" <= x andalso x <= "z" orelse "A" <= x andalso x <= "Z"; |
|
157 |
val is_underscore = equal "_"; |
|
158 |
val is_tilde = equal "~"; |
|
159 |
val is_newline = equal "\n"; |
|
160 |
val is_tab = equal "\t"; |
|
161 |
val is_space = equal " "; |
|
162 |
val is_whitespace = is_space orf is_tab orf is_newline; |
|
163 |
val is_whitespace' = is_space orf is_tab; |
|
164 |
||
165 |
val number = Scan.many1 (is_digit o symbol); |
|
166 |
||
167 |
val identifier = |
|
168 |
Scan.one (is_alpha o symbol) ::: |
|
169 |
Scan.many |
|
170 |
((is_alpha orf is_digit orf is_underscore) o symbol) @@@ |
|
171 |
Scan.optional (Scan.one (is_tilde o symbol) >> single) []; |
|
172 |
||
173 |
val long_identifier = |
|
174 |
identifier @@@ (Scan.repeat1 ($$$ "." @@@ identifier) >> flat); |
|
175 |
||
176 |
val whitespace = Scan.many (is_whitespace o symbol); |
|
47297
de84dd9a9dd4
Require "For" keyword to be followed by at least one whitespace, to avoid that
berghofe
parents:
47083
diff
changeset
|
177 |
val whitespace1 = Scan.many1 (is_whitespace o symbol); |
41561 | 178 |
val whitespace' = Scan.many (is_whitespace' o symbol); |
179 |
val newline = Scan.one (is_newline o symbol); |
|
180 |
||
181 |
fun beginning n cs = |
|
182 |
let |
|
183 |
val drop_blanks = #1 o take_suffix is_whitespace; |
|
184 |
val all_cs = drop_blanks cs; |
|
185 |
val dots = if length all_cs > n then " ..." else ""; |
|
186 |
in |
|
187 |
(drop_blanks (take n all_cs) |
|
188 |
|> map (fn c => if is_whitespace c then " " else c) |
|
189 |
|> implode) ^ dots |
|
190 |
end; |
|
191 |
||
192 |
fun !!! text scan = |
|
193 |
let |
|
48911
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
wenzelm
parents:
47297
diff
changeset
|
194 |
fun get_pos [] = " (end-of-input)" |
48992 | 195 |
| get_pos ((_, pos) :: _) = Position.here pos; |
41561 | 196 |
|
43947
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
41584
diff
changeset
|
197 |
fun err (syms, msg) = fn () => |
41561 | 198 |
text ^ get_pos syms ^ " at " ^ beginning 10 (map symbol syms) ^ |
43947
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
41584
diff
changeset
|
199 |
(case msg of NONE => "" | SOME m => "\n" ^ m ()); |
41561 | 200 |
in Scan.!! err scan end; |
201 |
||
202 |
val any_line' = |
|
203 |
Scan.many (not o (Scan.is_stopper char_stopper orf (is_newline o symbol))); |
|
204 |
||
205 |
val any_line = whitespace' |-- any_line' --| |
|
206 |
newline >> (implode o map symbol); |
|
207 |
||
55106 | 208 |
fun gen_comment a b = $$$ a |-- !!! "unclosed comment" |
41561 | 209 |
(Scan.repeat (Scan.unless ($$$ b) any) --| $$$ b) >> make_token Comment; |
210 |
||
211 |
val c_comment = gen_comment "/*" "*/"; |
|
212 |
val curly_comment = gen_comment "{" "}"; |
|
213 |
||
214 |
val percent_comment = $$$ "%" |-- any_line' >> make_token Comment; |
|
215 |
||
216 |
fun repeatn 0 _ = Scan.succeed [] |
|
217 |
| repeatn n p = Scan.one p ::: repeatn (n-1) p; |
|
218 |
||
219 |
||
220 |
(** header of *.vcg and *.siv files (see simplifier/load_provenance.pro) **) |
|
221 |
||
222 |
type banner = string * string * string; |
|
223 |
type date = string * string * string; |
|
224 |
type time = string * string * string * string option; |
|
225 |
||
226 |
val asterisks = Scan.repeat1 (Scan.one (equal "*" o symbol)); |
|
227 |
||
228 |
fun alphan n = repeatn n (is_alpha o symbol) >> (implode o map symbol); |
|
229 |
fun digitn n = repeatn n (is_digit o symbol) >> (implode o map symbol); |
|
230 |
||
231 |
val time = |
|
232 |
digitn 2 --| $$$ ":" -- digitn 2 --| $$$ ":" -- digitn 2 -- |
|
233 |
Scan.option ($$$ "." |-- digitn 2) >> |
|
234 |
(fn (((hr, mi), s), ms) => (hr, mi, s, ms)); |
|
235 |
||
236 |
val date = |
|
237 |
digitn 2 --| $$$ "-" -- alphan 3 --| $$$ "-" -- digitn 4 >> |
|
238 |
(fn ((d, m), y) => (d, m, y)); |
|
239 |
||
240 |
val banner = |
|
241 |
whitespace' |-- asterisks --| whitespace' --| newline :|-- (fn xs => |
|
242 |
(any_line -- any_line -- any_line >> |
|
243 |
(fn ((l1, l2), l3) => (l1, l2, l3))) --| |
|
244 |
whitespace' --| prfx (map symbol xs) --| whitespace' --| newline); |
|
245 |
||
246 |
val vcg_header = banner -- Scan.option (whitespace |-- |
|
247 |
$$$ "DATE :" |-- whitespace |-- date --| whitespace --| |
|
248 |
Scan.option ($$$ "TIME :" -- whitespace) -- time); |
|
249 |
||
250 |
val siv_header = banner --| whitespace -- |
|
251 |
($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --| |
|
252 |
whitespace -- |
|
253 |
($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --| |
|
254 |
newline --| newline -- (any_line -- any_line) >> |
|
255 |
(fn (((b, c), s), ls) => (b, c, s, ls)); |
|
256 |
||
257 |
||
258 |
(** the main tokenizer **) |
|
259 |
||
260 |
fun scan header comment = |
|
261 |
!!! "bad header" header --| whitespace -- |
|
262 |
Scan.repeat (Scan.unless (Scan.one is_char_eof) |
|
263 |
(!!! "bad input" |
|
264 |
( comment |
|
47297
de84dd9a9dd4
Require "For" keyword to be followed by at least one whitespace, to avoid that
berghofe
parents:
47083
diff
changeset
|
265 |
|| (keyword "For" -- whitespace1) |-- |
41561 | 266 |
Scan.repeat1 (Scan.unless (keyword ":") any) --| |
267 |
keyword ":" >> make_token Traceability |
|
268 |
|| Scan.max leq_token |
|
269 |
(Scan.literal lexicon >> make_token Keyword) |
|
270 |
( long_identifier >> make_token Long_Ident |
|
271 |
|| identifier >> make_token Ident) |
|
272 |
|| number >> make_token Number) --| |
|
273 |
whitespace)); |
|
274 |
||
275 |
fun tokenize header comment pos s = |
|
276 |
fst (Scan.finite char_stopper |
|
277 |
(Scan.error (scan header comment)) (explode_pos s pos)); |
|
278 |
||
279 |
end; |