src/HOL/SPARK/Tools/fdl_lexer.ML
author wenzelm
Thu Aug 23 17:46:03 2012 +0200 (2012-08-23)
changeset 48911 5debc3e4fa81
parent 47297 de84dd9a9dd4
child 48992 0518bf89c777
permissions -rw-r--r--
tuned messages: end-of-input rarely means physical end-of-file from the past;
     1 (*  Title:      HOL/SPARK/Tools/fdl_lexer.ML
     2     Author:     Stefan Berghofer
     3     Copyright:  secunet Security Networks AG
     4 
     5 Lexical analyzer for fdl files.
     6 *)
     7 
     8 signature FDL_LEXER =
     9 sig
    10   type T
    11   type chars
    12   type banner
    13   type date
    14   type time
    15   datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF
    16   val tokenize: (chars -> 'a * chars) -> (chars -> T * chars) ->
    17     Position.T -> string -> 'a * T list
    18   val position_of: T -> Position.T
    19   val pos_of: T -> string
    20   val is_eof: T -> bool
    21   val stopper: T Scan.stopper
    22   val kind_of: T -> kind
    23   val content_of: T -> string
    24   val unparse: T -> string
    25   val is_proper: T -> bool
    26   val is_digit: string -> bool
    27   val c_comment: chars -> T * chars
    28   val curly_comment: chars -> T * chars
    29   val percent_comment: chars -> T * chars
    30   val vcg_header: chars -> (banner * (date * time) option) * chars
    31   val siv_header: chars ->
    32     (banner * (date * time) * (date * time) * (string * string)) * chars
    33 end;
    34 
    35 structure Fdl_Lexer: FDL_LEXER =
    36 struct
    37 
    38 (** tokens **)
    39 
    40 datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF;
    41 
    42 datatype T = Token of kind * string * Position.T;
    43 
    44 fun make_token k xs = Token (k, implode (map fst xs),
    45   case xs of [] => Position.none | (_, p) :: _ => p);
    46 
    47 fun kind_of (Token (k, _, _)) = k;
    48 
    49 fun is_proper (Token (Comment, _, _)) = false
    50   | is_proper _ = true;
    51 
    52 fun content_of (Token (_, s, _)) = s;
    53 
    54 fun unparse (Token (Traceability, s, _)) = "For " ^ s ^ ":"
    55   | unparse (Token (_, s, _)) = s;
    56 
    57 fun position_of (Token (_, _, pos)) = pos;
    58 
    59 val pos_of = Position.str_of o position_of;
    60 
    61 fun is_eof (Token (EOF, _, _)) = true
    62   | is_eof _ = false;
    63 
    64 fun mk_eof pos = Token (EOF, "", pos);
    65 val eof = mk_eof Position.none;
    66 
    67 val stopper =
    68   Scan.stopper (fn [] => eof | toks => mk_eof (position_of (List.last toks))) is_eof;
    69 
    70 fun leq_token (Token (_, s, _), Token (_, s', _)) = size s <= size s';
    71 
    72 
    73 (** split up a string into a list of characters (with positions) **)
    74 
    75 type chars = (string * Position.T) list;
    76 
    77 fun is_char_eof ("", _) = true
    78   | is_char_eof _ = false;
    79 
    80 val char_stopper = Scan.stopper (K ("", Position.none)) is_char_eof;
    81 
    82 fun symbol (x : string, _ : Position.T) = x;
    83 
    84 fun explode_pos s pos = fst (fold_map (fn x => fn pos =>
    85   ((x, pos), Position.advance x pos)) (raw_explode s) pos);
    86 
    87 
    88 (** scanners **)
    89 
    90 val any = Scan.one (not o Scan.is_stopper char_stopper);
    91 
    92 fun prfx [] = Scan.succeed []
    93   | prfx (x :: xs) = Scan.one (equal x o symbol) ::: prfx xs;
    94 
    95 val $$$ = prfx o raw_explode;
    96 
    97 val lexicon = Scan.make_lexicon (map raw_explode
    98   ["rule_family",
    99    "For",
   100    ":",
   101    "[",
   102    "]",
   103    "(",
   104    ")",
   105    ",",
   106    "&",
   107    ";",
   108    "=",
   109    ".",
   110    "..",
   111    "requires",
   112    "may_be_replaced_by",
   113    "may_be_deduced",
   114    "may_be_deduced_from",
   115    "are_interchangeable",
   116    "if",
   117    "end",
   118    "function",
   119    "procedure",
   120    "type",
   121    "var",
   122    "const",
   123    "array",
   124    "record",
   125    ":=",
   126    "of",
   127    "**",
   128    "*",
   129    "/",
   130    "div",
   131    "mod",
   132    "+",
   133    "-",
   134    "<>",
   135    "<",
   136    ">",
   137    "<=",
   138    ">=",
   139    "<->",
   140    "->",
   141    "not",
   142    "and",
   143    "or",
   144    "for_some",
   145    "for_all",
   146    "***",
   147    "!!!",
   148    "element",
   149    "update",
   150    "pending"]);
   151 
   152 fun keyword s = Scan.literal lexicon :|--
   153   (fn xs => if map symbol xs = raw_explode s then Scan.succeed xs else Scan.fail);
   154 
   155 fun is_digit x = "0" <= x andalso x <= "9";
   156 fun is_alpha x = "a" <= x andalso x <= "z" orelse "A" <= x andalso x <= "Z";
   157 val is_underscore = equal "_";
   158 val is_tilde = equal "~";
   159 val is_newline = equal "\n";
   160 val is_tab = equal "\t";
   161 val is_space = equal " ";
   162 val is_whitespace = is_space orf is_tab orf is_newline;
   163 val is_whitespace' = is_space orf is_tab;
   164 
   165 val number = Scan.many1 (is_digit o symbol);
   166 
   167 val identifier =
   168   Scan.one (is_alpha o symbol) :::
   169   Scan.many
   170     ((is_alpha orf is_digit orf is_underscore) o symbol) @@@
   171    Scan.optional (Scan.one (is_tilde o symbol) >> single) [];
   172 
   173 val long_identifier =
   174   identifier @@@ (Scan.repeat1 ($$$ "." @@@ identifier) >> flat);
   175 
   176 val whitespace = Scan.many (is_whitespace o symbol);
   177 val whitespace1 = Scan.many1 (is_whitespace o symbol);
   178 val whitespace' = Scan.many (is_whitespace' o symbol);
   179 val newline = Scan.one (is_newline o symbol);
   180 
   181 fun beginning n cs =
   182   let
   183     val drop_blanks = #1 o take_suffix is_whitespace;
   184     val all_cs = drop_blanks cs;
   185     val dots = if length all_cs > n then " ..." else "";
   186   in
   187     (drop_blanks (take n all_cs)
   188       |> map (fn c => if is_whitespace c then " " else c)
   189       |> implode) ^ dots
   190   end;
   191 
   192 fun !!! text scan =
   193   let
   194     fun get_pos [] = " (end-of-input)"
   195       | get_pos ((_, pos) :: _) = Position.str_of pos;
   196 
   197     fun err (syms, msg) = fn () =>
   198       text ^ get_pos syms ^ " at " ^ beginning 10 (map symbol syms) ^
   199         (case msg of NONE => "" | SOME m => "\n" ^ m ());
   200   in Scan.!! err scan end;
   201 
   202 val any_line' =
   203   Scan.many (not o (Scan.is_stopper char_stopper orf (is_newline o symbol)));
   204 
   205 val any_line = whitespace' |-- any_line' --|
   206   newline >> (implode o map symbol);
   207 
   208 fun gen_comment a b = $$$ a |-- !!! "missing end of comment"
   209   (Scan.repeat (Scan.unless ($$$ b) any) --| $$$ b) >> make_token Comment;
   210 
   211 val c_comment = gen_comment "/*" "*/";
   212 val curly_comment = gen_comment "{" "}";
   213 
   214 val percent_comment = $$$ "%" |-- any_line' >> make_token Comment;
   215 
   216 fun repeatn 0 _ = Scan.succeed []
   217   | repeatn n p = Scan.one p ::: repeatn (n-1) p;
   218 
   219 
   220 (** header of *.vcg and *.siv files (see simplifier/load_provenance.pro) **)
   221 
   222 type banner = string * string * string;
   223 type date = string * string * string;
   224 type time = string * string * string * string option;
   225 
   226 val asterisks = Scan.repeat1 (Scan.one (equal "*" o symbol));
   227 
   228 fun alphan n = repeatn n (is_alpha o symbol) >> (implode o map symbol);
   229 fun digitn n = repeatn n (is_digit o symbol) >> (implode o map symbol);
   230 
   231 val time =
   232   digitn 2 --| $$$ ":" -- digitn 2 --| $$$ ":" -- digitn 2 --
   233   Scan.option ($$$ "." |-- digitn 2) >>
   234     (fn (((hr, mi), s), ms) => (hr, mi, s, ms));
   235 
   236 val date =
   237   digitn 2 --| $$$ "-" -- alphan 3 --| $$$ "-" -- digitn 4 >>
   238     (fn ((d, m), y) => (d, m, y));
   239 
   240 val banner = 
   241   whitespace' |-- asterisks --| whitespace' --| newline :|-- (fn xs =>
   242     (any_line -- any_line -- any_line >>
   243        (fn ((l1, l2), l3) => (l1, l2, l3))) --|
   244     whitespace' --| prfx (map symbol xs) --| whitespace' --| newline);
   245 
   246 val vcg_header = banner -- Scan.option (whitespace |--
   247   $$$ "DATE :" |-- whitespace |-- date --| whitespace --|
   248   Scan.option ($$$ "TIME :" -- whitespace) -- time);
   249 
   250 val siv_header = banner --| whitespace --
   251   ($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
   252   whitespace --
   253   ($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
   254   newline --| newline -- (any_line -- any_line) >>
   255     (fn (((b, c), s), ls) => (b, c, s, ls));
   256 
   257 
   258 (** the main tokenizer **)
   259 
   260 fun scan header comment =
   261   !!! "bad header" header --| whitespace --
   262   Scan.repeat (Scan.unless (Scan.one is_char_eof)
   263     (!!! "bad input"
   264        (   comment
   265         || (keyword "For" -- whitespace1) |--
   266               Scan.repeat1 (Scan.unless (keyword ":") any) --|
   267               keyword ":" >> make_token Traceability
   268         || Scan.max leq_token
   269              (Scan.literal lexicon >> make_token Keyword)
   270              (   long_identifier >> make_token Long_Ident
   271               || identifier >> make_token Ident)
   272         || number >> make_token Number) --|
   273      whitespace));
   274 
   275 fun tokenize header comment pos s =
   276   fst (Scan.finite char_stopper
   277     (Scan.error (scan header comment)) (explode_pos s pos));
   278 
   279 end;