41561
|
1 |
(* Title: HOL/SPARK/Tools/fdl_lexer.ML
|
|
2 |
Author: Stefan Berghofer
|
|
3 |
Copyright: secunet Security Networks AG
|
|
4 |
|
|
5 |
Lexical analyzer for fdl files.
|
|
6 |
*)
|
|
7 |
|
|
8 |
signature FDL_LEXER =
|
|
9 |
sig
|
|
10 |
type T
|
|
11 |
type chars
|
|
12 |
type banner
|
|
13 |
type date
|
|
14 |
type time
|
|
15 |
datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF
|
|
16 |
val tokenize: (chars -> 'a * chars) -> (chars -> T * chars) ->
|
|
17 |
Position.T -> string -> 'a * T list
|
|
18 |
val position_of: T -> Position.T
|
|
19 |
val pos_of: T -> string
|
|
20 |
val is_eof: T -> bool
|
|
21 |
val stopper: T Scan.stopper
|
|
22 |
val kind_of: T -> kind
|
|
23 |
val content_of: T -> string
|
|
24 |
val unparse: T -> string
|
|
25 |
val is_proper: T -> bool
|
|
26 |
val is_digit: string -> bool
|
|
27 |
val c_comment: chars -> T * chars
|
|
28 |
val curly_comment: chars -> T * chars
|
|
29 |
val percent_comment: chars -> T * chars
|
|
30 |
val vcg_header: chars -> (banner * (date * time) option) * chars
|
|
31 |
val siv_header: chars ->
|
|
32 |
(banner * (date * time) * (date * time) * (string * string)) * chars
|
|
33 |
end;
|
|
34 |
|
|
35 |
structure Fdl_Lexer: FDL_LEXER =
|
|
36 |
struct
|
|
37 |
|
|
38 |
(** tokens **)
|
|
39 |
|
|
40 |
datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF;
|
|
41 |
|
|
42 |
datatype T = Token of kind * string * Position.T;
|
|
43 |
|
|
44 |
fun make_token k xs = Token (k, implode (map fst xs),
|
|
45 |
case xs of [] => Position.none | (_, p) :: _ => p);
|
|
46 |
|
|
47 |
fun kind_of (Token (k, _, _)) = k;
|
|
48 |
|
|
49 |
fun is_proper (Token (Comment, _, _)) = false
|
|
50 |
| is_proper _ = true;
|
|
51 |
|
|
52 |
fun content_of (Token (_, s, _)) = s;
|
|
53 |
|
|
54 |
fun unparse (Token (Traceability, s, _)) = "For " ^ s ^ ":"
|
|
55 |
| unparse (Token (_, s, _)) = s;
|
|
56 |
|
|
57 |
fun position_of (Token (_, _, pos)) = pos;
|
|
58 |
|
|
59 |
val pos_of = Position.str_of o position_of;
|
|
60 |
|
|
61 |
fun is_eof (Token (EOF, _, _)) = true
|
|
62 |
| is_eof _ = false;
|
|
63 |
|
|
64 |
fun mk_eof pos = Token (EOF, "", pos);
|
|
65 |
val eof = mk_eof Position.none;
|
|
66 |
|
|
67 |
val stopper =
|
|
68 |
Scan.stopper (fn [] => eof | toks => mk_eof (position_of (List.last toks))) is_eof;
|
|
69 |
|
|
70 |
fun leq_token (Token (_, s, _), Token (_, s', _)) = size s <= size s';
|
|
71 |
|
|
72 |
|
|
73 |
(** split up a string into a list of characters (with positions) **)
|
|
74 |
|
|
75 |
type chars = (string * Position.T) list;
|
|
76 |
|
|
77 |
(* a variant of Position.advance (see Pure/General/position.ML) *)
|
|
78 |
fun advance x pos =
|
|
79 |
let
|
|
80 |
fun incr i = i+1;
|
|
81 |
fun change_prop s f props = case Properties.get_int props s of
|
|
82 |
NONE => props
|
|
83 |
| SOME i => Properties.put (s, Int.toString (f i)) props;
|
|
84 |
in
|
|
85 |
pos |>
|
|
86 |
Position.properties_of |>
|
|
87 |
change_prop "offset" incr |>
|
|
88 |
(case x of
|
|
89 |
"\n" =>
|
|
90 |
change_prop "line" incr #>
|
|
91 |
change_prop "column" (K 1)
|
|
92 |
| _ => change_prop "column" incr) |>
|
|
93 |
Position.of_properties
|
|
94 |
end;
|
|
95 |
|
|
96 |
fun is_char_eof ("", _) = true
|
|
97 |
| is_char_eof _ = false;
|
|
98 |
|
|
99 |
val char_stopper = Scan.stopper (K ("", Position.none)) is_char_eof;
|
|
100 |
|
|
101 |
fun symbol (x : string, _ : Position.T) = x;
|
|
102 |
|
|
103 |
fun explode_pos s pos = fst (fold_map (fn x => fn pos =>
|
|
104 |
((x, pos), advance x pos)) (raw_explode s) pos);
|
|
105 |
|
|
106 |
|
|
107 |
(** scanners **)
|
|
108 |
|
|
109 |
val any = Scan.one (not o Scan.is_stopper char_stopper);
|
|
110 |
|
|
111 |
fun prfx [] = Scan.succeed []
|
|
112 |
| prfx (x :: xs) = Scan.one (equal x o symbol) ::: prfx xs;
|
|
113 |
|
|
114 |
val $$$ = prfx o raw_explode;
|
|
115 |
|
|
116 |
val lexicon = Scan.make_lexicon (map raw_explode
|
|
117 |
["rule_family",
|
|
118 |
"title",
|
|
119 |
"For",
|
|
120 |
":",
|
|
121 |
"[",
|
|
122 |
"]",
|
|
123 |
"(",
|
|
124 |
")",
|
|
125 |
",",
|
|
126 |
"&",
|
|
127 |
";",
|
|
128 |
"=",
|
|
129 |
".",
|
|
130 |
"..",
|
|
131 |
"requires",
|
|
132 |
"may_be_replaced_by",
|
|
133 |
"may_be_deduced",
|
|
134 |
"may_be_deduced_from",
|
|
135 |
"are_interchangeable",
|
|
136 |
"if",
|
|
137 |
"end",
|
|
138 |
"function",
|
|
139 |
"procedure",
|
|
140 |
"type",
|
|
141 |
"var",
|
|
142 |
"const",
|
|
143 |
"array",
|
|
144 |
"record",
|
|
145 |
":=",
|
|
146 |
"of",
|
|
147 |
"**",
|
|
148 |
"*",
|
|
149 |
"/",
|
|
150 |
"div",
|
|
151 |
"mod",
|
|
152 |
"+",
|
|
153 |
"-",
|
|
154 |
"<>",
|
|
155 |
"<",
|
|
156 |
">",
|
|
157 |
"<=",
|
|
158 |
">=",
|
|
159 |
"<->",
|
|
160 |
"->",
|
|
161 |
"not",
|
|
162 |
"and",
|
|
163 |
"or",
|
|
164 |
"for_some",
|
|
165 |
"for_all",
|
|
166 |
"***",
|
|
167 |
"!!!",
|
|
168 |
"element",
|
|
169 |
"update",
|
|
170 |
"pending"]);
|
|
171 |
|
|
172 |
fun keyword s = Scan.literal lexicon :|--
|
|
173 |
(fn xs => if map symbol xs = raw_explode s then Scan.succeed xs else Scan.fail);
|
|
174 |
|
|
175 |
fun is_digit x = "0" <= x andalso x <= "9";
|
|
176 |
fun is_alpha x = "a" <= x andalso x <= "z" orelse "A" <= x andalso x <= "Z";
|
|
177 |
val is_underscore = equal "_";
|
|
178 |
val is_tilde = equal "~";
|
|
179 |
val is_newline = equal "\n";
|
|
180 |
val is_tab = equal "\t";
|
|
181 |
val is_space = equal " ";
|
|
182 |
val is_whitespace = is_space orf is_tab orf is_newline;
|
|
183 |
val is_whitespace' = is_space orf is_tab;
|
|
184 |
|
|
185 |
val number = Scan.many1 (is_digit o symbol);
|
|
186 |
|
|
187 |
val identifier =
|
|
188 |
Scan.one (is_alpha o symbol) :::
|
|
189 |
Scan.many
|
|
190 |
((is_alpha orf is_digit orf is_underscore) o symbol) @@@
|
|
191 |
Scan.optional (Scan.one (is_tilde o symbol) >> single) [];
|
|
192 |
|
|
193 |
val long_identifier =
|
|
194 |
identifier @@@ (Scan.repeat1 ($$$ "." @@@ identifier) >> flat);
|
|
195 |
|
|
196 |
val whitespace = Scan.many (is_whitespace o symbol);
|
|
197 |
val whitespace' = Scan.many (is_whitespace' o symbol);
|
|
198 |
val newline = Scan.one (is_newline o symbol);
|
|
199 |
|
|
200 |
fun beginning n cs =
|
|
201 |
let
|
|
202 |
val drop_blanks = #1 o take_suffix is_whitespace;
|
|
203 |
val all_cs = drop_blanks cs;
|
|
204 |
val dots = if length all_cs > n then " ..." else "";
|
|
205 |
in
|
|
206 |
(drop_blanks (take n all_cs)
|
|
207 |
|> map (fn c => if is_whitespace c then " " else c)
|
|
208 |
|> implode) ^ dots
|
|
209 |
end;
|
|
210 |
|
|
211 |
fun !!! text scan =
|
|
212 |
let
|
|
213 |
fun get_pos [] = " (past end-of-text!)"
|
|
214 |
| get_pos ((_, pos) :: _) = Position.str_of pos;
|
|
215 |
|
|
216 |
fun err (syms, msg) =
|
|
217 |
text ^ get_pos syms ^ " at " ^ beginning 10 (map symbol syms) ^
|
|
218 |
(case msg of NONE => "" | SOME s => "\n" ^ s);
|
|
219 |
in Scan.!! err scan end;
|
|
220 |
|
|
221 |
val any_line' =
|
|
222 |
Scan.many (not o (Scan.is_stopper char_stopper orf (is_newline o symbol)));
|
|
223 |
|
|
224 |
val any_line = whitespace' |-- any_line' --|
|
|
225 |
newline >> (implode o map symbol);
|
|
226 |
|
|
227 |
fun gen_comment a b = $$$ a |-- !!! "missing end of comment"
|
|
228 |
(Scan.repeat (Scan.unless ($$$ b) any) --| $$$ b) >> make_token Comment;
|
|
229 |
|
|
230 |
val c_comment = gen_comment "/*" "*/";
|
|
231 |
val curly_comment = gen_comment "{" "}";
|
|
232 |
|
|
233 |
val percent_comment = $$$ "%" |-- any_line' >> make_token Comment;
|
|
234 |
|
|
235 |
fun repeatn 0 _ = Scan.succeed []
|
|
236 |
| repeatn n p = Scan.one p ::: repeatn (n-1) p;
|
|
237 |
|
|
238 |
|
|
239 |
(** header of *.vcg and *.siv files (see simplifier/load_provenance.pro) **)
|
|
240 |
|
|
241 |
type banner = string * string * string;
|
|
242 |
type date = string * string * string;
|
|
243 |
type time = string * string * string * string option;
|
|
244 |
|
|
245 |
val asterisks = Scan.repeat1 (Scan.one (equal "*" o symbol));
|
|
246 |
|
|
247 |
fun alphan n = repeatn n (is_alpha o symbol) >> (implode o map symbol);
|
|
248 |
fun digitn n = repeatn n (is_digit o symbol) >> (implode o map symbol);
|
|
249 |
|
|
250 |
val time =
|
|
251 |
digitn 2 --| $$$ ":" -- digitn 2 --| $$$ ":" -- digitn 2 --
|
|
252 |
Scan.option ($$$ "." |-- digitn 2) >>
|
|
253 |
(fn (((hr, mi), s), ms) => (hr, mi, s, ms));
|
|
254 |
|
|
255 |
val date =
|
|
256 |
digitn 2 --| $$$ "-" -- alphan 3 --| $$$ "-" -- digitn 4 >>
|
|
257 |
(fn ((d, m), y) => (d, m, y));
|
|
258 |
|
|
259 |
val banner =
|
|
260 |
whitespace' |-- asterisks --| whitespace' --| newline :|-- (fn xs =>
|
|
261 |
(any_line -- any_line -- any_line >>
|
|
262 |
(fn ((l1, l2), l3) => (l1, l2, l3))) --|
|
|
263 |
whitespace' --| prfx (map symbol xs) --| whitespace' --| newline);
|
|
264 |
|
|
265 |
val vcg_header = banner -- Scan.option (whitespace |--
|
|
266 |
$$$ "DATE :" |-- whitespace |-- date --| whitespace --|
|
|
267 |
Scan.option ($$$ "TIME :" -- whitespace) -- time);
|
|
268 |
|
|
269 |
val siv_header = banner --| whitespace --
|
|
270 |
($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
|
|
271 |
whitespace --
|
|
272 |
($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
|
|
273 |
newline --| newline -- (any_line -- any_line) >>
|
|
274 |
(fn (((b, c), s), ls) => (b, c, s, ls));
|
|
275 |
|
|
276 |
|
|
277 |
(** the main tokenizer **)
|
|
278 |
|
|
279 |
fun scan header comment =
|
|
280 |
!!! "bad header" header --| whitespace --
|
|
281 |
Scan.repeat (Scan.unless (Scan.one is_char_eof)
|
|
282 |
(!!! "bad input"
|
|
283 |
( comment
|
|
284 |
|| (keyword "For" -- whitespace) |--
|
|
285 |
Scan.repeat1 (Scan.unless (keyword ":") any) --|
|
|
286 |
keyword ":" >> make_token Traceability
|
|
287 |
|| Scan.max leq_token
|
|
288 |
(Scan.literal lexicon >> make_token Keyword)
|
|
289 |
( long_identifier >> make_token Long_Ident
|
|
290 |
|| identifier >> make_token Ident)
|
|
291 |
|| number >> make_token Number) --|
|
|
292 |
whitespace));
|
|
293 |
|
|
294 |
fun tokenize header comment pos s =
|
|
295 |
fst (Scan.finite char_stopper
|
|
296 |
(Scan.error (scan header comment)) (explode_pos s pos));
|
|
297 |
|
|
298 |
end;
|