| author | wenzelm | 
| Thu, 11 Aug 2011 12:30:41 +0200 | |
| changeset 44150 | d0d76f40d7a0 | 
| parent 43947 | 9b00f09f7721 | 
| child 47083 | d04b38d4035b | 
| permissions | -rw-r--r-- | 
| 41561 | 1 | (* Title: HOL/SPARK/Tools/fdl_lexer.ML | 
| 2 | Author: Stefan Berghofer | |
| 3 | Copyright: secunet Security Networks AG | |
| 4 | ||
| 5 | Lexical analyzer for fdl files. | |
| 6 | *) | |
| 7 | ||
| 8 | signature FDL_LEXER = | |
| 9 | sig | |
| 10 | type T | |
| 11 | type chars | |
| 12 | type banner | |
| 13 | type date | |
| 14 | type time | |
| 15 | datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF | |
| 16 | val tokenize: (chars -> 'a * chars) -> (chars -> T * chars) -> | |
| 17 | Position.T -> string -> 'a * T list | |
| 18 | val position_of: T -> Position.T | |
| 19 | val pos_of: T -> string | |
| 20 | val is_eof: T -> bool | |
| 21 | val stopper: T Scan.stopper | |
| 22 | val kind_of: T -> kind | |
| 23 | val content_of: T -> string | |
| 24 | val unparse: T -> string | |
| 25 | val is_proper: T -> bool | |
| 26 | val is_digit: string -> bool | |
| 27 | val c_comment: chars -> T * chars | |
| 28 | val curly_comment: chars -> T * chars | |
| 29 | val percent_comment: chars -> T * chars | |
| 30 | val vcg_header: chars -> (banner * (date * time) option) * chars | |
| 31 | val siv_header: chars -> | |
| 32 | (banner * (date * time) * (date * time) * (string * string)) * chars | |
| 33 | end; | |
| 34 | ||
| 35 | structure Fdl_Lexer: FDL_LEXER = | |
| 36 | struct | |
| 37 | ||
| 38 | (** tokens **) | |
| 39 | ||
| 40 | datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF; | |
| 41 | ||
| 42 | datatype T = Token of kind * string * Position.T; | |
| 43 | ||
| 44 | fun make_token k xs = Token (k, implode (map fst xs), | |
| 45 | case xs of [] => Position.none | (_, p) :: _ => p); | |
| 46 | ||
| 47 | fun kind_of (Token (k, _, _)) = k; | |
| 48 | ||
| 49 | fun is_proper (Token (Comment, _, _)) = false | |
| 50 | | is_proper _ = true; | |
| 51 | ||
| 52 | fun content_of (Token (_, s, _)) = s; | |
| 53 | ||
| 54 | fun unparse (Token (Traceability, s, _)) = "For " ^ s ^ ":" | |
| 55 | | unparse (Token (_, s, _)) = s; | |
| 56 | ||
| 57 | fun position_of (Token (_, _, pos)) = pos; | |
| 58 | ||
| 59 | val pos_of = Position.str_of o position_of; | |
| 60 | ||
| 61 | fun is_eof (Token (EOF, _, _)) = true | |
| 62 | | is_eof _ = false; | |
| 63 | ||
| 64 | fun mk_eof pos = Token (EOF, "", pos); | |
| 65 | val eof = mk_eof Position.none; | |
| 66 | ||
| 67 | val stopper = | |
| 68 | Scan.stopper (fn [] => eof | toks => mk_eof (position_of (List.last toks))) is_eof; | |
| 69 | ||
| 70 | fun leq_token (Token (_, s, _), Token (_, s', _)) = size s <= size s'; | |
| 71 | ||
| 72 | ||
| 73 | (** split up a string into a list of characters (with positions) **) | |
| 74 | ||
| 75 | type chars = (string * Position.T) list; | |
| 76 | ||
| 77 | fun is_char_eof ("", _) = true
 | |
| 78 | | is_char_eof _ = false; | |
| 79 | ||
| 80 | val char_stopper = Scan.stopper (K ("", Position.none)) is_char_eof;
 | |
| 81 | ||
| 82 | fun symbol (x : string, _ : Position.T) = x; | |
| 83 | ||
| 84 | fun explode_pos s pos = fst (fold_map (fn x => fn pos => | |
| 41584 
2b07a4212d6d
Replaced ad-hoc advance function by Position.advance
 berghofe parents: 
41561diff
changeset | 85 | ((x, pos), Position.advance x pos)) (raw_explode s) pos); | 
| 41561 | 86 | |
| 87 | ||
| 88 | (** scanners **) | |
| 89 | ||
| 90 | val any = Scan.one (not o Scan.is_stopper char_stopper); | |
| 91 | ||
| 92 | fun prfx [] = Scan.succeed [] | |
| 93 | | prfx (x :: xs) = Scan.one (equal x o symbol) ::: prfx xs; | |
| 94 | ||
| 95 | val $$$ = prfx o raw_explode; | |
| 96 | ||
| 97 | val lexicon = Scan.make_lexicon (map raw_explode | |
| 98 | ["rule_family", | |
| 99 | "title", | |
| 100 | "For", | |
| 101 | ":", | |
| 102 | "[", | |
| 103 | "]", | |
| 104 |    "(",
 | |
| 105 | ")", | |
| 106 | ",", | |
| 107 | "&", | |
| 108 | ";", | |
| 109 | "=", | |
| 110 | ".", | |
| 111 | "..", | |
| 112 | "requires", | |
| 113 | "may_be_replaced_by", | |
| 114 | "may_be_deduced", | |
| 115 | "may_be_deduced_from", | |
| 116 | "are_interchangeable", | |
| 117 | "if", | |
| 118 | "end", | |
| 119 | "function", | |
| 120 | "procedure", | |
| 121 | "type", | |
| 122 | "var", | |
| 123 | "const", | |
| 124 | "array", | |
| 125 | "record", | |
| 126 | ":=", | |
| 127 | "of", | |
| 128 | "**", | |
| 129 | "*", | |
| 130 | "/", | |
| 131 | "div", | |
| 132 | "mod", | |
| 133 | "+", | |
| 134 | "-", | |
| 135 | "<>", | |
| 136 | "<", | |
| 137 | ">", | |
| 138 | "<=", | |
| 139 | ">=", | |
| 140 | "<->", | |
| 141 | "->", | |
| 142 | "not", | |
| 143 | "and", | |
| 144 | "or", | |
| 145 | "for_some", | |
| 146 | "for_all", | |
| 147 | "***", | |
| 148 | "!!!", | |
| 149 | "element", | |
| 150 | "update", | |
| 151 | "pending"]); | |
| 152 | ||
| 153 | fun keyword s = Scan.literal lexicon :|-- | |
| 154 | (fn xs => if map symbol xs = raw_explode s then Scan.succeed xs else Scan.fail); | |
| 155 | ||
| 156 | fun is_digit x = "0" <= x andalso x <= "9"; | |
| 157 | fun is_alpha x = "a" <= x andalso x <= "z" orelse "A" <= x andalso x <= "Z"; | |
| 158 | val is_underscore = equal "_"; | |
| 159 | val is_tilde = equal "~"; | |
| 160 | val is_newline = equal "\n"; | |
| 161 | val is_tab = equal "\t"; | |
| 162 | val is_space = equal " "; | |
| 163 | val is_whitespace = is_space orf is_tab orf is_newline; | |
| 164 | val is_whitespace' = is_space orf is_tab; | |
| 165 | ||
| 166 | val number = Scan.many1 (is_digit o symbol); | |
| 167 | ||
| 168 | val identifier = | |
| 169 | Scan.one (is_alpha o symbol) ::: | |
| 170 | Scan.many | |
| 171 | ((is_alpha orf is_digit orf is_underscore) o symbol) @@@ | |
| 172 | Scan.optional (Scan.one (is_tilde o symbol) >> single) []; | |
| 173 | ||
| 174 | val long_identifier = | |
| 175 | identifier @@@ (Scan.repeat1 ($$$ "." @@@ identifier) >> flat); | |
| 176 | ||
| 177 | val whitespace = Scan.many (is_whitespace o symbol); | |
| 178 | val whitespace' = Scan.many (is_whitespace' o symbol); | |
| 179 | val newline = Scan.one (is_newline o symbol); | |
| 180 | ||
| 181 | fun beginning n cs = | |
| 182 | let | |
| 183 | val drop_blanks = #1 o take_suffix is_whitespace; | |
| 184 | val all_cs = drop_blanks cs; | |
| 185 | val dots = if length all_cs > n then " ..." else ""; | |
| 186 | in | |
| 187 | (drop_blanks (take n all_cs) | |
| 188 | |> map (fn c => if is_whitespace c then " " else c) | |
| 189 | |> implode) ^ dots | |
| 190 | end; | |
| 191 | ||
| 192 | fun !!! text scan = | |
| 193 | let | |
| 194 | fun get_pos [] = " (past end-of-text!)" | |
| 195 | | get_pos ((_, pos) :: _) = Position.str_of pos; | |
| 196 | ||
| 43947 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
41584diff
changeset | 197 | fun err (syms, msg) = fn () => | 
| 41561 | 198 | text ^ get_pos syms ^ " at " ^ beginning 10 (map symbol syms) ^ | 
| 43947 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
41584diff
changeset | 199 | (case msg of NONE => "" | SOME m => "\n" ^ m ()); | 
| 41561 | 200 | in Scan.!! err scan end; | 
| 201 | ||
| 202 | val any_line' = | |
| 203 | Scan.many (not o (Scan.is_stopper char_stopper orf (is_newline o symbol))); | |
| 204 | ||
| 205 | val any_line = whitespace' |-- any_line' --| | |
| 206 | newline >> (implode o map symbol); | |
| 207 | ||
| 208 | fun gen_comment a b = $$$ a |-- !!! "missing end of comment" | |
| 209 | (Scan.repeat (Scan.unless ($$$ b) any) --| $$$ b) >> make_token Comment; | |
| 210 | ||
| 211 | val c_comment = gen_comment "/*" "*/"; | |
| 212 | val curly_comment = gen_comment "{" "}";
 | |
| 213 | ||
| 214 | val percent_comment = $$$ "%" |-- any_line' >> make_token Comment; | |
| 215 | ||
| 216 | fun repeatn 0 _ = Scan.succeed [] | |
| 217 | | repeatn n p = Scan.one p ::: repeatn (n-1) p; | |
| 218 | ||
| 219 | ||
| 220 | (** header of *.vcg and *.siv files (see simplifier/load_provenance.pro) **) | |
| 221 | ||
| 222 | type banner = string * string * string; | |
| 223 | type date = string * string * string; | |
| 224 | type time = string * string * string * string option; | |
| 225 | ||
| 226 | val asterisks = Scan.repeat1 (Scan.one (equal "*" o symbol)); | |
| 227 | ||
| 228 | fun alphan n = repeatn n (is_alpha o symbol) >> (implode o map symbol); | |
| 229 | fun digitn n = repeatn n (is_digit o symbol) >> (implode o map symbol); | |
| 230 | ||
| 231 | val time = | |
| 232 | digitn 2 --| $$$ ":" -- digitn 2 --| $$$ ":" -- digitn 2 -- | |
| 233 | Scan.option ($$$ "." |-- digitn 2) >> | |
| 234 | (fn (((hr, mi), s), ms) => (hr, mi, s, ms)); | |
| 235 | ||
| 236 | val date = | |
| 237 | digitn 2 --| $$$ "-" -- alphan 3 --| $$$ "-" -- digitn 4 >> | |
| 238 | (fn ((d, m), y) => (d, m, y)); | |
| 239 | ||
| 240 | val banner = | |
| 241 | whitespace' |-- asterisks --| whitespace' --| newline :|-- (fn xs => | |
| 242 | (any_line -- any_line -- any_line >> | |
| 243 | (fn ((l1, l2), l3) => (l1, l2, l3))) --| | |
| 244 | whitespace' --| prfx (map symbol xs) --| whitespace' --| newline); | |
| 245 | ||
| 246 | val vcg_header = banner -- Scan.option (whitespace |-- | |
| 247 | $$$ "DATE :" |-- whitespace |-- date --| whitespace --| | |
| 248 | Scan.option ($$$ "TIME :" -- whitespace) -- time); | |
| 249 | ||
| 250 | val siv_header = banner --| whitespace -- | |
| 251 | ($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --| | |
| 252 | whitespace -- | |
| 253 | ($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --| | |
| 254 | newline --| newline -- (any_line -- any_line) >> | |
| 255 | (fn (((b, c), s), ls) => (b, c, s, ls)); | |
| 256 | ||
| 257 | ||
| 258 | (** the main tokenizer **) | |
| 259 | ||
| 260 | fun scan header comment = | |
| 261 | !!! "bad header" header --| whitespace -- | |
| 262 | Scan.repeat (Scan.unless (Scan.one is_char_eof) | |
| 263 | (!!! "bad input" | |
| 264 | ( comment | |
| 265 | || (keyword "For" -- whitespace) |-- | |
| 266 | Scan.repeat1 (Scan.unless (keyword ":") any) --| | |
| 267 | keyword ":" >> make_token Traceability | |
| 268 | || Scan.max leq_token | |
| 269 | (Scan.literal lexicon >> make_token Keyword) | |
| 270 | ( long_identifier >> make_token Long_Ident | |
| 271 | || identifier >> make_token Ident) | |
| 272 | || number >> make_token Number) --| | |
| 273 | whitespace)); | |
| 274 | ||
| 275 | fun tokenize header comment pos s = | |
| 276 | fst (Scan.finite char_stopper | |
| 277 | (Scan.error (scan header comment)) (explode_pos s pos)); | |
| 278 | ||
| 279 | end; |