388
|
1 |
(* Title: Pure/Thy/thy_scan.ML
|
|
2 |
ID: $Id$
|
|
3 |
Author: Markus Wenzel, TU Muenchen
|
|
4 |
|
|
5 |
The scanner for theory files.
|
|
6 |
*)
|
|
7 |
|
|
8 |
signature THY_SCAN =
|
|
9 |
sig
|
|
10 |
datatype token_kind =
|
|
11 |
Keyword | Ident | LongIdent | TypeVar | Nat | String | Verbatim | EOF
|
|
12 |
val name_of_kind: token_kind -> string
|
|
13 |
type lexicon
|
|
14 |
val make_lexicon: string list -> lexicon
|
|
15 |
val tokenize: lexicon -> string -> (token_kind * string * int) list
|
|
16 |
end;
|
|
17 |
|
|
18 |
|
|
19 |
functor ThyScanFun(Scanner: SCANNER): THY_SCAN =
|
|
20 |
struct
|
|
21 |
|
|
22 |
open Scanner;
|
|
23 |
|
|
24 |
|
|
25 |
(** token kinds **)
|
|
26 |
|
|
27 |
datatype token_kind =
|
|
28 |
Keyword | Ident | LongIdent | TypeVar | Nat | String | Verbatim | EOF;
|
|
29 |
|
|
30 |
fun name_of_kind Keyword = "keyword"
|
|
31 |
| name_of_kind Ident = "identifier"
|
|
32 |
| name_of_kind LongIdent = "long identifier"
|
|
33 |
| name_of_kind TypeVar = "type variable"
|
|
34 |
| name_of_kind Nat = "natural number"
|
|
35 |
| name_of_kind String = "string"
|
|
36 |
| name_of_kind Verbatim = "verbatim text"
|
|
37 |
| name_of_kind EOF = "end-of-file";
|
|
38 |
|
|
39 |
|
|
40 |
|
|
41 |
(** scanners **)
|
|
42 |
|
|
43 |
fun lex_err line msg =
|
|
44 |
error ("Lexical error on line " ^ string_of_int line ^ ": " ^ msg);
|
|
45 |
|
|
46 |
|
|
47 |
(* misc utilities *)
|
|
48 |
|
|
49 |
fun count_lines cs =
|
|
50 |
foldl (fn (sum, c) => if c = "\n" then sum + 1 else sum) (0, cs);
|
|
51 |
|
|
52 |
fun inc_line n s = n + count_lines (explode s);
|
|
53 |
|
|
54 |
fun cons_fst c (cs, x, y) = (c :: cs, x, y);
|
|
55 |
|
|
56 |
|
|
57 |
(* scan strings *)
|
|
58 |
|
|
59 |
val scan_ctrl = $$ "^" ^^ scan_one (fn c => ord c >= 64 andalso ord c <= 95);
|
|
60 |
|
|
61 |
val scan_dig = scan_one is_digit;
|
|
62 |
|
|
63 |
val scan_blanks1 = scan_any1 is_blank >> implode;
|
|
64 |
|
|
65 |
val scan_esc =
|
|
66 |
$$ "n" || $$ "t" || $$ "\"" || $$ "\\" ||
|
|
67 |
scan_ctrl || scan_dig ^^ scan_dig ^^ scan_dig ||
|
|
68 |
scan_blanks1 ^^ $$ "\\";
|
|
69 |
|
|
70 |
|
|
71 |
fun string ("\"" :: cs) n = (["\""], cs, n)
|
|
72 |
| string ("\\" :: cs) n =
|
|
73 |
(case optional scan_esc cs of
|
|
74 |
(None, _) => lex_err n "bad escape sequence in string"
|
|
75 |
| (Some s, cs') => cons_fst ("\\" ^ s) (string cs' (inc_line n s)))
|
|
76 |
| string (c :: cs) n = cons_fst c (string cs n)
|
|
77 |
| string [] n = lex_err n "missing quote at end of string";
|
|
78 |
|
|
79 |
|
|
80 |
(* scan verbatim text *)
|
|
81 |
|
|
82 |
fun verbatim ("|" :: "}" :: cs) n = ([], cs, n)
|
|
83 |
| verbatim (c :: cs) n = cons_fst c (verbatim cs (inc_line n c))
|
|
84 |
| verbatim [] n = lex_err n "missing end of verbatim text";
|
|
85 |
|
|
86 |
|
|
87 |
(* scan nested comments *)
|
|
88 |
|
|
89 |
fun comment ("*" :: ")" :: cs) n 1 = (cs, n)
|
|
90 |
| comment ("*" :: ")" :: cs) n d = comment cs n (d - 1)
|
|
91 |
| comment ("(" :: "*" :: cs) n d = comment cs n (d + 1)
|
|
92 |
| comment (c :: cs) n d = comment cs (inc_line n c) d
|
|
93 |
| comment [] n _ = lex_err n "missing end of comment";
|
|
94 |
|
|
95 |
|
|
96 |
|
|
97 |
(** tokenize **)
|
|
98 |
|
|
99 |
val scan_longid = scan_id ^^ (repeat1 ($$ "." ^^ scan_id) >> implode);
|
|
100 |
|
|
101 |
fun scan_word lex =
|
|
102 |
max_of
|
|
103 |
(scan_literal lex >> pair Keyword)
|
|
104 |
(scan_longid >> pair LongIdent ||
|
|
105 |
scan_id >> pair Ident ||
|
|
106 |
scan_tid >> pair TypeVar ||
|
|
107 |
scan_nat >> pair Nat);
|
|
108 |
|
|
109 |
fun add_tok toks kind n (cs, cs', n') =
|
|
110 |
((kind, implode cs, n) :: toks, cs', n');
|
|
111 |
|
|
112 |
val take_line = implode o fst o take_prefix (not_equal "\n");
|
|
113 |
|
|
114 |
|
|
115 |
fun tokenize lex str =
|
|
116 |
let
|
|
117 |
fun scan (toks, [], n) = rev ((EOF, "end-of-file", n) :: toks)
|
|
118 |
| scan (toks, "\"" :: cs, n) =
|
|
119 |
scan (add_tok toks String n (cons_fst "\"" (string cs n)))
|
|
120 |
| scan (toks, "{" :: "|" :: cs, n) =
|
|
121 |
scan (add_tok toks Verbatim n (verbatim cs n))
|
|
122 |
| scan (toks, "(" :: "*" :: cs, n) =
|
|
123 |
scan ((fn (cs', n') => (toks, cs', n')) (comment cs n 1))
|
|
124 |
| scan (toks, chs as c :: cs, n) =
|
|
125 |
if is_blank c then scan (toks, cs, inc_line n c)
|
|
126 |
else
|
|
127 |
(case scan_word lex chs of
|
|
128 |
(None, _) => lex_err n (quote (take_line chs))
|
|
129 |
| (Some (Keyword, "ML"), chs') => scan ((Verbatim, implode chs', n)
|
|
130 |
:: (Keyword, "ML", n) :: toks, [], n + count_lines chs')
|
|
131 |
| (Some (tk, s), chs') => scan ((tk, s, n) :: toks, chs', n));
|
|
132 |
in
|
|
133 |
scan ([], explode str, 1)
|
|
134 |
end;
|
|
135 |
|
|
136 |
|
|
137 |
end;
|
|
138 |
|