src/HOL/SPARK/Tools/fdl_lexer.ML
author wenzelm
Thu Aug 23 17:46:03 2012 +0200 (2012-08-23)
changeset 48911 5debc3e4fa81
parent 47297 de84dd9a9dd4
child 48992 0518bf89c777
permissions -rw-r--r--
tuned messages: end-of-input rarely means physical end-of-file from the past;
berghofe@41561
     1
(*  Title:      HOL/SPARK/Tools/fdl_lexer.ML
berghofe@41561
     2
    Author:     Stefan Berghofer
berghofe@41561
     3
    Copyright:  secunet Security Networks AG
berghofe@41561
     4
berghofe@41561
     5
Lexical analyzer for fdl files.
berghofe@41561
     6
*)
berghofe@41561
     7
berghofe@41561
     8
signature FDL_LEXER =
berghofe@41561
     9
sig
berghofe@41561
    10
  type T
berghofe@41561
    11
  type chars
berghofe@41561
    12
  type banner
berghofe@41561
    13
  type date
berghofe@41561
    14
  type time
berghofe@41561
    15
  datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF
berghofe@41561
    16
  val tokenize: (chars -> 'a * chars) -> (chars -> T * chars) ->
berghofe@41561
    17
    Position.T -> string -> 'a * T list
berghofe@41561
    18
  val position_of: T -> Position.T
berghofe@41561
    19
  val pos_of: T -> string
berghofe@41561
    20
  val is_eof: T -> bool
berghofe@41561
    21
  val stopper: T Scan.stopper
berghofe@41561
    22
  val kind_of: T -> kind
berghofe@41561
    23
  val content_of: T -> string
berghofe@41561
    24
  val unparse: T -> string
berghofe@41561
    25
  val is_proper: T -> bool
berghofe@41561
    26
  val is_digit: string -> bool
berghofe@41561
    27
  val c_comment: chars -> T * chars
berghofe@41561
    28
  val curly_comment: chars -> T * chars
berghofe@41561
    29
  val percent_comment: chars -> T * chars
berghofe@41561
    30
  val vcg_header: chars -> (banner * (date * time) option) * chars
berghofe@41561
    31
  val siv_header: chars ->
berghofe@41561
    32
    (banner * (date * time) * (date * time) * (string * string)) * chars
berghofe@41561
    33
end;
berghofe@41561
    34
berghofe@41561
    35
structure Fdl_Lexer: FDL_LEXER =
berghofe@41561
    36
struct
berghofe@41561
    37
berghofe@41561
    38
(** tokens **)
berghofe@41561
    39
berghofe@41561
    40
datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF;
berghofe@41561
    41
berghofe@41561
    42
datatype T = Token of kind * string * Position.T;
berghofe@41561
    43
berghofe@41561
    44
fun make_token k xs = Token (k, implode (map fst xs),
berghofe@41561
    45
  case xs of [] => Position.none | (_, p) :: _ => p);
berghofe@41561
    46
berghofe@41561
    47
fun kind_of (Token (k, _, _)) = k;
berghofe@41561
    48
berghofe@41561
    49
fun is_proper (Token (Comment, _, _)) = false
berghofe@41561
    50
  | is_proper _ = true;
berghofe@41561
    51
berghofe@41561
    52
fun content_of (Token (_, s, _)) = s;
berghofe@41561
    53
berghofe@41561
    54
fun unparse (Token (Traceability, s, _)) = "For " ^ s ^ ":"
berghofe@41561
    55
  | unparse (Token (_, s, _)) = s;
berghofe@41561
    56
berghofe@41561
    57
fun position_of (Token (_, _, pos)) = pos;
berghofe@41561
    58
berghofe@41561
    59
val pos_of = Position.str_of o position_of;
berghofe@41561
    60
berghofe@41561
    61
fun is_eof (Token (EOF, _, _)) = true
berghofe@41561
    62
  | is_eof _ = false;
berghofe@41561
    63
berghofe@41561
    64
fun mk_eof pos = Token (EOF, "", pos);
berghofe@41561
    65
val eof = mk_eof Position.none;
berghofe@41561
    66
berghofe@41561
    67
val stopper =
berghofe@41561
    68
  Scan.stopper (fn [] => eof | toks => mk_eof (position_of (List.last toks))) is_eof;
berghofe@41561
    69
berghofe@41561
    70
fun leq_token (Token (_, s, _), Token (_, s', _)) = size s <= size s';
berghofe@41561
    71
berghofe@41561
    72
berghofe@41561
    73
(** split up a string into a list of characters (with positions) **)
berghofe@41561
    74
berghofe@41561
    75
type chars = (string * Position.T) list;
berghofe@41561
    76
berghofe@41561
    77
fun is_char_eof ("", _) = true
berghofe@41561
    78
  | is_char_eof _ = false;
berghofe@41561
    79
berghofe@41561
    80
val char_stopper = Scan.stopper (K ("", Position.none)) is_char_eof;
berghofe@41561
    81
berghofe@41561
    82
fun symbol (x : string, _ : Position.T) = x;
berghofe@41561
    83
berghofe@41561
    84
fun explode_pos s pos = fst (fold_map (fn x => fn pos =>
berghofe@41584
    85
  ((x, pos), Position.advance x pos)) (raw_explode s) pos);
berghofe@41561
    86
berghofe@41561
    87
berghofe@41561
    88
(** scanners **)
berghofe@41561
    89
berghofe@41561
    90
val any = Scan.one (not o Scan.is_stopper char_stopper);
berghofe@41561
    91
berghofe@41561
    92
fun prfx [] = Scan.succeed []
berghofe@41561
    93
  | prfx (x :: xs) = Scan.one (equal x o symbol) ::: prfx xs;
berghofe@41561
    94
berghofe@41561
    95
val $$$ = prfx o raw_explode;
berghofe@41561
    96
berghofe@41561
    97
val lexicon = Scan.make_lexicon (map raw_explode
berghofe@41561
    98
  ["rule_family",
berghofe@41561
    99
   "For",
berghofe@41561
   100
   ":",
berghofe@41561
   101
   "[",
berghofe@41561
   102
   "]",
berghofe@41561
   103
   "(",
berghofe@41561
   104
   ")",
berghofe@41561
   105
   ",",
berghofe@41561
   106
   "&",
berghofe@41561
   107
   ";",
berghofe@41561
   108
   "=",
berghofe@41561
   109
   ".",
berghofe@41561
   110
   "..",
berghofe@41561
   111
   "requires",
berghofe@41561
   112
   "may_be_replaced_by",
berghofe@41561
   113
   "may_be_deduced",
berghofe@41561
   114
   "may_be_deduced_from",
berghofe@41561
   115
   "are_interchangeable",
berghofe@41561
   116
   "if",
berghofe@41561
   117
   "end",
berghofe@41561
   118
   "function",
berghofe@41561
   119
   "procedure",
berghofe@41561
   120
   "type",
berghofe@41561
   121
   "var",
berghofe@41561
   122
   "const",
berghofe@41561
   123
   "array",
berghofe@41561
   124
   "record",
berghofe@41561
   125
   ":=",
berghofe@41561
   126
   "of",
berghofe@41561
   127
   "**",
berghofe@41561
   128
   "*",
berghofe@41561
   129
   "/",
berghofe@41561
   130
   "div",
berghofe@41561
   131
   "mod",
berghofe@41561
   132
   "+",
berghofe@41561
   133
   "-",
berghofe@41561
   134
   "<>",
berghofe@41561
   135
   "<",
berghofe@41561
   136
   ">",
berghofe@41561
   137
   "<=",
berghofe@41561
   138
   ">=",
berghofe@41561
   139
   "<->",
berghofe@41561
   140
   "->",
berghofe@41561
   141
   "not",
berghofe@41561
   142
   "and",
berghofe@41561
   143
   "or",
berghofe@41561
   144
   "for_some",
berghofe@41561
   145
   "for_all",
berghofe@41561
   146
   "***",
berghofe@41561
   147
   "!!!",
berghofe@41561
   148
   "element",
berghofe@41561
   149
   "update",
berghofe@41561
   150
   "pending"]);
berghofe@41561
   151
berghofe@41561
   152
fun keyword s = Scan.literal lexicon :|--
berghofe@41561
   153
  (fn xs => if map symbol xs = raw_explode s then Scan.succeed xs else Scan.fail);
berghofe@41561
   154
berghofe@41561
   155
fun is_digit x = "0" <= x andalso x <= "9";
berghofe@41561
   156
fun is_alpha x = "a" <= x andalso x <= "z" orelse "A" <= x andalso x <= "Z";
berghofe@41561
   157
val is_underscore = equal "_";
berghofe@41561
   158
val is_tilde = equal "~";
berghofe@41561
   159
val is_newline = equal "\n";
berghofe@41561
   160
val is_tab = equal "\t";
berghofe@41561
   161
val is_space = equal " ";
berghofe@41561
   162
val is_whitespace = is_space orf is_tab orf is_newline;
berghofe@41561
   163
val is_whitespace' = is_space orf is_tab;
berghofe@41561
   164
berghofe@41561
   165
val number = Scan.many1 (is_digit o symbol);
berghofe@41561
   166
berghofe@41561
   167
val identifier =
berghofe@41561
   168
  Scan.one (is_alpha o symbol) :::
berghofe@41561
   169
  Scan.many
berghofe@41561
   170
    ((is_alpha orf is_digit orf is_underscore) o symbol) @@@
berghofe@41561
   171
   Scan.optional (Scan.one (is_tilde o symbol) >> single) [];
berghofe@41561
   172
berghofe@41561
   173
val long_identifier =
berghofe@41561
   174
  identifier @@@ (Scan.repeat1 ($$$ "." @@@ identifier) >> flat);
berghofe@41561
   175
berghofe@41561
   176
val whitespace = Scan.many (is_whitespace o symbol);
berghofe@47297
   177
val whitespace1 = Scan.many1 (is_whitespace o symbol);
berghofe@41561
   178
val whitespace' = Scan.many (is_whitespace' o symbol);
berghofe@41561
   179
val newline = Scan.one (is_newline o symbol);
berghofe@41561
   180
berghofe@41561
   181
fun beginning n cs =
berghofe@41561
   182
  let
berghofe@41561
   183
    val drop_blanks = #1 o take_suffix is_whitespace;
berghofe@41561
   184
    val all_cs = drop_blanks cs;
berghofe@41561
   185
    val dots = if length all_cs > n then " ..." else "";
berghofe@41561
   186
  in
berghofe@41561
   187
    (drop_blanks (take n all_cs)
berghofe@41561
   188
      |> map (fn c => if is_whitespace c then " " else c)
berghofe@41561
   189
      |> implode) ^ dots
berghofe@41561
   190
  end;
berghofe@41561
   191
berghofe@41561
   192
fun !!! text scan =
berghofe@41561
   193
  let
wenzelm@48911
   194
    fun get_pos [] = " (end-of-input)"
berghofe@41561
   195
      | get_pos ((_, pos) :: _) = Position.str_of pos;
berghofe@41561
   196
wenzelm@43947
   197
    fun err (syms, msg) = fn () =>
berghofe@41561
   198
      text ^ get_pos syms ^ " at " ^ beginning 10 (map symbol syms) ^
wenzelm@43947
   199
        (case msg of NONE => "" | SOME m => "\n" ^ m ());
berghofe@41561
   200
  in Scan.!! err scan end;
berghofe@41561
   201
berghofe@41561
   202
val any_line' =
berghofe@41561
   203
  Scan.many (not o (Scan.is_stopper char_stopper orf (is_newline o symbol)));
berghofe@41561
   204
berghofe@41561
   205
val any_line = whitespace' |-- any_line' --|
berghofe@41561
   206
  newline >> (implode o map symbol);
berghofe@41561
   207
berghofe@41561
   208
fun gen_comment a b = $$$ a |-- !!! "missing end of comment"
berghofe@41561
   209
  (Scan.repeat (Scan.unless ($$$ b) any) --| $$$ b) >> make_token Comment;
berghofe@41561
   210
berghofe@41561
   211
val c_comment = gen_comment "/*" "*/";
berghofe@41561
   212
val curly_comment = gen_comment "{" "}";
berghofe@41561
   213
berghofe@41561
   214
val percent_comment = $$$ "%" |-- any_line' >> make_token Comment;
berghofe@41561
   215
berghofe@41561
   216
fun repeatn 0 _ = Scan.succeed []
berghofe@41561
   217
  | repeatn n p = Scan.one p ::: repeatn (n-1) p;
berghofe@41561
   218
berghofe@41561
   219
berghofe@41561
   220
(** header of *.vcg and *.siv files (see simplifier/load_provenance.pro) **)
berghofe@41561
   221
berghofe@41561
   222
type banner = string * string * string;
berghofe@41561
   223
type date = string * string * string;
berghofe@41561
   224
type time = string * string * string * string option;
berghofe@41561
   225
berghofe@41561
   226
val asterisks = Scan.repeat1 (Scan.one (equal "*" o symbol));
berghofe@41561
   227
berghofe@41561
   228
fun alphan n = repeatn n (is_alpha o symbol) >> (implode o map symbol);
berghofe@41561
   229
fun digitn n = repeatn n (is_digit o symbol) >> (implode o map symbol);
berghofe@41561
   230
berghofe@41561
   231
val time =
berghofe@41561
   232
  digitn 2 --| $$$ ":" -- digitn 2 --| $$$ ":" -- digitn 2 --
berghofe@41561
   233
  Scan.option ($$$ "." |-- digitn 2) >>
berghofe@41561
   234
    (fn (((hr, mi), s), ms) => (hr, mi, s, ms));
berghofe@41561
   235
berghofe@41561
   236
val date =
berghofe@41561
   237
  digitn 2 --| $$$ "-" -- alphan 3 --| $$$ "-" -- digitn 4 >>
berghofe@41561
   238
    (fn ((d, m), y) => (d, m, y));
berghofe@41561
   239
berghofe@41561
   240
val banner = 
berghofe@41561
   241
  whitespace' |-- asterisks --| whitespace' --| newline :|-- (fn xs =>
berghofe@41561
   242
    (any_line -- any_line -- any_line >>
berghofe@41561
   243
       (fn ((l1, l2), l3) => (l1, l2, l3))) --|
berghofe@41561
   244
    whitespace' --| prfx (map symbol xs) --| whitespace' --| newline);
berghofe@41561
   245
berghofe@41561
   246
val vcg_header = banner -- Scan.option (whitespace |--
berghofe@41561
   247
  $$$ "DATE :" |-- whitespace |-- date --| whitespace --|
berghofe@41561
   248
  Scan.option ($$$ "TIME :" -- whitespace) -- time);
berghofe@41561
   249
berghofe@41561
   250
val siv_header = banner --| whitespace --
berghofe@41561
   251
  ($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
berghofe@41561
   252
  whitespace --
berghofe@41561
   253
  ($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
berghofe@41561
   254
  newline --| newline -- (any_line -- any_line) >>
berghofe@41561
   255
    (fn (((b, c), s), ls) => (b, c, s, ls));
berghofe@41561
   256
berghofe@41561
   257
berghofe@41561
   258
(** the main tokenizer **)
berghofe@41561
   259
berghofe@41561
   260
fun scan header comment =
berghofe@41561
   261
  !!! "bad header" header --| whitespace --
berghofe@41561
   262
  Scan.repeat (Scan.unless (Scan.one is_char_eof)
berghofe@41561
   263
    (!!! "bad input"
berghofe@41561
   264
       (   comment
berghofe@47297
   265
        || (keyword "For" -- whitespace1) |--
berghofe@41561
   266
              Scan.repeat1 (Scan.unless (keyword ":") any) --|
berghofe@41561
   267
              keyword ":" >> make_token Traceability
berghofe@41561
   268
        || Scan.max leq_token
berghofe@41561
   269
             (Scan.literal lexicon >> make_token Keyword)
berghofe@41561
   270
             (   long_identifier >> make_token Long_Ident
berghofe@41561
   271
              || identifier >> make_token Ident)
berghofe@41561
   272
        || number >> make_token Number) --|
berghofe@41561
   273
     whitespace));
berghofe@41561
   274
berghofe@41561
   275
fun tokenize header comment pos s =
berghofe@41561
   276
  fst (Scan.finite char_stopper
berghofe@41561
   277
    (Scan.error (scan header comment)) (explode_pos s pos));
berghofe@41561
   278
berghofe@41561
   279
end;