| author | wenzelm | 
| Sat, 16 Aug 2014 18:08:55 +0200 | |
| changeset 57956 | 3ab5d15fac6b | 
| parent 55106 | 080c0006e917 | 
| child 61476 | 1884c40f1539 | 
| permissions | -rw-r--r-- | 
| 41561 | 1  | 
(* Title: HOL/SPARK/Tools/fdl_lexer.ML  | 
2  | 
Author: Stefan Berghofer  | 
|
3  | 
Copyright: secunet Security Networks AG  | 
|
4  | 
||
5  | 
Lexical analyzer for fdl files.  | 
|
6  | 
*)  | 
|
7  | 
||
8  | 
signature FDL_LEXER =  | 
|
9  | 
sig  | 
|
10  | 
type T  | 
|
11  | 
type chars  | 
|
12  | 
type banner  | 
|
13  | 
type date  | 
|
14  | 
type time  | 
|
15  | 
datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF  | 
|
16  | 
val tokenize: (chars -> 'a * chars) -> (chars -> T * chars) ->  | 
|
17  | 
Position.T -> string -> 'a * T list  | 
|
18  | 
val position_of: T -> Position.T  | 
|
19  | 
val pos_of: T -> string  | 
|
20  | 
val is_eof: T -> bool  | 
|
21  | 
val stopper: T Scan.stopper  | 
|
22  | 
val kind_of: T -> kind  | 
|
23  | 
val content_of: T -> string  | 
|
24  | 
val unparse: T -> string  | 
|
25  | 
val is_proper: T -> bool  | 
|
26  | 
val is_digit: string -> bool  | 
|
27  | 
val c_comment: chars -> T * chars  | 
|
28  | 
val curly_comment: chars -> T * chars  | 
|
29  | 
val percent_comment: chars -> T * chars  | 
|
30  | 
val vcg_header: chars -> (banner * (date * time) option) * chars  | 
|
31  | 
val siv_header: chars ->  | 
|
32  | 
(banner * (date * time) * (date * time) * (string * string)) * chars  | 
|
33  | 
end;  | 
|
34  | 
||
35  | 
structure Fdl_Lexer: FDL_LEXER =  | 
|
36  | 
struct  | 
|
37  | 
||
38  | 
(** tokens **)  | 
|
39  | 
||
40  | 
datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF;  | 
|
41  | 
||
42  | 
datatype T = Token of kind * string * Position.T;  | 
|
43  | 
||
44  | 
fun make_token k xs = Token (k, implode (map fst xs),  | 
|
45  | 
case xs of [] => Position.none | (_, p) :: _ => p);  | 
|
46  | 
||
47  | 
fun kind_of (Token (k, _, _)) = k;  | 
|
48  | 
||
49  | 
fun is_proper (Token (Comment, _, _)) = false  | 
|
50  | 
| is_proper _ = true;  | 
|
51  | 
||
52  | 
fun content_of (Token (_, s, _)) = s;  | 
|
53  | 
||
54  | 
fun unparse (Token (Traceability, s, _)) = "For " ^ s ^ ":"  | 
|
55  | 
| unparse (Token (_, s, _)) = s;  | 
|
56  | 
||
57  | 
fun position_of (Token (_, _, pos)) = pos;  | 
|
58  | 
||
| 48992 | 59  | 
val pos_of = Position.here o position_of;  | 
| 41561 | 60  | 
|
61  | 
fun is_eof (Token (EOF, _, _)) = true  | 
|
62  | 
| is_eof _ = false;  | 
|
63  | 
||
64  | 
fun mk_eof pos = Token (EOF, "", pos);  | 
|
65  | 
val eof = mk_eof Position.none;  | 
|
66  | 
||
67  | 
val stopper =  | 
|
68  | 
Scan.stopper (fn [] => eof | toks => mk_eof (position_of (List.last toks))) is_eof;  | 
|
69  | 
||
70  | 
fun leq_token (Token (_, s, _), Token (_, s', _)) = size s <= size s';  | 
|
71  | 
||
72  | 
||
73  | 
(** split up a string into a list of characters (with positions) **)  | 
|
74  | 
||
75  | 
type chars = (string * Position.T) list;  | 
|
76  | 
||
77  | 
fun is_char_eof ("", _) = true
 | 
|
78  | 
| is_char_eof _ = false;  | 
|
79  | 
||
80  | 
val char_stopper = Scan.stopper (K ("", Position.none)) is_char_eof;
 | 
|
81  | 
||
82  | 
fun symbol (x : string, _ : Position.T) = x;  | 
|
83  | 
||
84  | 
fun explode_pos s pos = fst (fold_map (fn x => fn pos =>  | 
|
| 
41584
 
2b07a4212d6d
Replaced ad-hoc advance function by Position.advance
 
berghofe 
parents: 
41561 
diff
changeset
 | 
85  | 
((x, pos), Position.advance x pos)) (raw_explode s) pos);  | 
| 41561 | 86  | 
|
87  | 
||
88  | 
(** scanners **)  | 
|
89  | 
||
90  | 
val any = Scan.one (not o Scan.is_stopper char_stopper);  | 
|
91  | 
||
92  | 
fun prfx [] = Scan.succeed []  | 
|
93  | 
| prfx (x :: xs) = Scan.one (equal x o symbol) ::: prfx xs;  | 
|
94  | 
||
95  | 
val $$$ = prfx o raw_explode;  | 
|
96  | 
||
97  | 
val lexicon = Scan.make_lexicon (map raw_explode  | 
|
98  | 
["rule_family",  | 
|
99  | 
"For",  | 
|
100  | 
":",  | 
|
101  | 
"[",  | 
|
102  | 
"]",  | 
|
103  | 
   "(",
 | 
|
104  | 
")",  | 
|
105  | 
",",  | 
|
106  | 
"&",  | 
|
107  | 
";",  | 
|
108  | 
"=",  | 
|
109  | 
".",  | 
|
110  | 
"..",  | 
|
111  | 
"requires",  | 
|
112  | 
"may_be_replaced_by",  | 
|
113  | 
"may_be_deduced",  | 
|
114  | 
"may_be_deduced_from",  | 
|
115  | 
"are_interchangeable",  | 
|
116  | 
"if",  | 
|
117  | 
"end",  | 
|
118  | 
"function",  | 
|
119  | 
"procedure",  | 
|
120  | 
"type",  | 
|
121  | 
"var",  | 
|
122  | 
"const",  | 
|
123  | 
"array",  | 
|
124  | 
"record",  | 
|
125  | 
":=",  | 
|
126  | 
"of",  | 
|
127  | 
"**",  | 
|
128  | 
"*",  | 
|
129  | 
"/",  | 
|
130  | 
"div",  | 
|
131  | 
"mod",  | 
|
132  | 
"+",  | 
|
133  | 
"-",  | 
|
134  | 
"<>",  | 
|
135  | 
"<",  | 
|
136  | 
">",  | 
|
137  | 
"<=",  | 
|
138  | 
">=",  | 
|
139  | 
"<->",  | 
|
140  | 
"->",  | 
|
141  | 
"not",  | 
|
142  | 
"and",  | 
|
143  | 
"or",  | 
|
144  | 
"for_some",  | 
|
145  | 
"for_all",  | 
|
146  | 
"***",  | 
|
147  | 
"!!!",  | 
|
148  | 
"element",  | 
|
149  | 
"update",  | 
|
150  | 
"pending"]);  | 
|
151  | 
||
152  | 
fun keyword s = Scan.literal lexicon :|--  | 
|
153  | 
(fn xs => if map symbol xs = raw_explode s then Scan.succeed xs else Scan.fail);  | 
|
154  | 
||
155  | 
fun is_digit x = "0" <= x andalso x <= "9";  | 
|
156  | 
fun is_alpha x = "a" <= x andalso x <= "z" orelse "A" <= x andalso x <= "Z";  | 
|
157  | 
val is_underscore = equal "_";  | 
|
158  | 
val is_tilde = equal "~";  | 
|
159  | 
val is_newline = equal "\n";  | 
|
160  | 
val is_tab = equal "\t";  | 
|
161  | 
val is_space = equal " ";  | 
|
162  | 
val is_whitespace = is_space orf is_tab orf is_newline;  | 
|
163  | 
val is_whitespace' = is_space orf is_tab;  | 
|
164  | 
||
165  | 
val number = Scan.many1 (is_digit o symbol);  | 
|
166  | 
||
167  | 
val identifier =  | 
|
168  | 
Scan.one (is_alpha o symbol) :::  | 
|
169  | 
Scan.many  | 
|
170  | 
((is_alpha orf is_digit orf is_underscore) o symbol) @@@  | 
|
171  | 
Scan.optional (Scan.one (is_tilde o symbol) >> single) [];  | 
|
172  | 
||
173  | 
val long_identifier =  | 
|
174  | 
identifier @@@ (Scan.repeat1 ($$$ "." @@@ identifier) >> flat);  | 
|
175  | 
||
176  | 
val whitespace = Scan.many (is_whitespace o symbol);  | 
|
| 
47297
 
de84dd9a9dd4
Require "For" keyword to be followed by at least one whitespace, to avoid that
 
berghofe 
parents: 
47083 
diff
changeset
 | 
177  | 
val whitespace1 = Scan.many1 (is_whitespace o symbol);  | 
| 41561 | 178  | 
val whitespace' = Scan.many (is_whitespace' o symbol);  | 
179  | 
val newline = Scan.one (is_newline o symbol);  | 
|
180  | 
||
181  | 
fun beginning n cs =  | 
|
182  | 
let  | 
|
183  | 
val drop_blanks = #1 o take_suffix is_whitespace;  | 
|
184  | 
val all_cs = drop_blanks cs;  | 
|
185  | 
val dots = if length all_cs > n then " ..." else "";  | 
|
186  | 
in  | 
|
187  | 
(drop_blanks (take n all_cs)  | 
|
188  | 
|> map (fn c => if is_whitespace c then " " else c)  | 
|
189  | 
|> implode) ^ dots  | 
|
190  | 
end;  | 
|
191  | 
||
192  | 
fun !!! text scan =  | 
|
193  | 
let  | 
|
| 
48911
 
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
 
wenzelm 
parents: 
47297 
diff
changeset
 | 
194  | 
fun get_pos [] = " (end-of-input)"  | 
| 48992 | 195  | 
| get_pos ((_, pos) :: _) = Position.here pos;  | 
| 41561 | 196  | 
|
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
41584 
diff
changeset
 | 
197  | 
fun err (syms, msg) = fn () =>  | 
| 41561 | 198  | 
text ^ get_pos syms ^ " at " ^ beginning 10 (map symbol syms) ^  | 
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
41584 
diff
changeset
 | 
199  | 
(case msg of NONE => "" | SOME m => "\n" ^ m ());  | 
| 41561 | 200  | 
in Scan.!! err scan end;  | 
201  | 
||
202  | 
val any_line' =  | 
|
203  | 
Scan.many (not o (Scan.is_stopper char_stopper orf (is_newline o symbol)));  | 
|
204  | 
||
205  | 
val any_line = whitespace' |-- any_line' --|  | 
|
206  | 
newline >> (implode o map symbol);  | 
|
207  | 
||
| 55106 | 208  | 
fun gen_comment a b = $$$ a |-- !!! "unclosed comment"  | 
| 41561 | 209  | 
(Scan.repeat (Scan.unless ($$$ b) any) --| $$$ b) >> make_token Comment;  | 
210  | 
||
211  | 
val c_comment = gen_comment "/*" "*/";  | 
|
212  | 
val curly_comment = gen_comment "{" "}";
 | 
|
213  | 
||
214  | 
val percent_comment = $$$ "%" |-- any_line' >> make_token Comment;  | 
|
215  | 
||
216  | 
fun repeatn 0 _ = Scan.succeed []  | 
|
217  | 
| repeatn n p = Scan.one p ::: repeatn (n-1) p;  | 
|
218  | 
||
219  | 
||
220  | 
(** header of *.vcg and *.siv files (see simplifier/load_provenance.pro) **)  | 
|
221  | 
||
222  | 
type banner = string * string * string;  | 
|
223  | 
type date = string * string * string;  | 
|
224  | 
type time = string * string * string * string option;  | 
|
225  | 
||
226  | 
val asterisks = Scan.repeat1 (Scan.one (equal "*" o symbol));  | 
|
227  | 
||
228  | 
fun alphan n = repeatn n (is_alpha o symbol) >> (implode o map symbol);  | 
|
229  | 
fun digitn n = repeatn n (is_digit o symbol) >> (implode o map symbol);  | 
|
230  | 
||
231  | 
val time =  | 
|
232  | 
digitn 2 --| $$$ ":" -- digitn 2 --| $$$ ":" -- digitn 2 --  | 
|
233  | 
Scan.option ($$$ "." |-- digitn 2) >>  | 
|
234  | 
(fn (((hr, mi), s), ms) => (hr, mi, s, ms));  | 
|
235  | 
||
236  | 
val date =  | 
|
237  | 
digitn 2 --| $$$ "-" -- alphan 3 --| $$$ "-" -- digitn 4 >>  | 
|
238  | 
(fn ((d, m), y) => (d, m, y));  | 
|
239  | 
||
240  | 
val banner =  | 
|
241  | 
whitespace' |-- asterisks --| whitespace' --| newline :|-- (fn xs =>  | 
|
242  | 
(any_line -- any_line -- any_line >>  | 
|
243  | 
(fn ((l1, l2), l3) => (l1, l2, l3))) --|  | 
|
244  | 
whitespace' --| prfx (map symbol xs) --| whitespace' --| newline);  | 
|
245  | 
||
246  | 
val vcg_header = banner -- Scan.option (whitespace |--  | 
|
247  | 
$$$ "DATE :" |-- whitespace |-- date --| whitespace --|  | 
|
248  | 
Scan.option ($$$ "TIME :" -- whitespace) -- time);  | 
|
249  | 
||
250  | 
val siv_header = banner --| whitespace --  | 
|
251  | 
($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|  | 
|
252  | 
whitespace --  | 
|
253  | 
($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|  | 
|
254  | 
newline --| newline -- (any_line -- any_line) >>  | 
|
255  | 
(fn (((b, c), s), ls) => (b, c, s, ls));  | 
|
256  | 
||
257  | 
||
258  | 
(** the main tokenizer **)  | 
|
259  | 
||
260  | 
fun scan header comment =  | 
|
261  | 
!!! "bad header" header --| whitespace --  | 
|
262  | 
Scan.repeat (Scan.unless (Scan.one is_char_eof)  | 
|
263  | 
(!!! "bad input"  | 
|
264  | 
( comment  | 
|
| 
47297
 
de84dd9a9dd4
Require "For" keyword to be followed by at least one whitespace, to avoid that
 
berghofe 
parents: 
47083 
diff
changeset
 | 
265  | 
|| (keyword "For" -- whitespace1) |--  | 
| 41561 | 266  | 
Scan.repeat1 (Scan.unless (keyword ":") any) --|  | 
267  | 
keyword ":" >> make_token Traceability  | 
|
268  | 
|| Scan.max leq_token  | 
|
269  | 
(Scan.literal lexicon >> make_token Keyword)  | 
|
270  | 
( long_identifier >> make_token Long_Ident  | 
|
271  | 
|| identifier >> make_token Ident)  | 
|
272  | 
|| number >> make_token Number) --|  | 
|
273  | 
whitespace));  | 
|
274  | 
||
275  | 
fun tokenize header comment pos s =  | 
|
276  | 
fst (Scan.finite char_stopper  | 
|
277  | 
(Scan.error (scan header comment)) (explode_pos s pos));  | 
|
278  | 
||
279  | 
end;  |