|
1 (* Title: Pure/Thy/thy_scan.ML |
|
2 ID: $Id$ |
|
3 Author: Markus Wenzel, TU Muenchen |
|
4 |
|
5 The scanner for theory files. |
|
6 *) |
|
7 |
|
8 signature THY_SCAN = |
|
9 sig |
|
10 datatype token_kind = |
|
11 Keyword | Ident | LongIdent | TypeVar | Nat | String | Verbatim | EOF |
|
12 val name_of_kind: token_kind -> string |
|
13 type lexicon |
|
14 val make_lexicon: string list -> lexicon |
|
15 val tokenize: lexicon -> string -> (token_kind * string * int) list |
|
16 end; |
|
17 |
|
18 |
|
19 functor ThyScanFun(Scanner: SCANNER): THY_SCAN = |
|
20 struct |
|
21 |
|
22 open Scanner; |
|
23 |
|
24 |
|
25 (** token kinds **) |
|
26 |
|
27 datatype token_kind = |
|
28 Keyword | Ident | LongIdent | TypeVar | Nat | String | Verbatim | EOF; |
|
29 |
|
30 fun name_of_kind Keyword = "keyword" |
|
31 | name_of_kind Ident = "identifier" |
|
32 | name_of_kind LongIdent = "long identifier" |
|
33 | name_of_kind TypeVar = "type variable" |
|
34 | name_of_kind Nat = "natural number" |
|
35 | name_of_kind String = "string" |
|
36 | name_of_kind Verbatim = "verbatim text" |
|
37 | name_of_kind EOF = "end-of-file"; |
|
38 |
|
39 |
|
40 |
|
41 (** scanners **) |
|
42 |
|
43 fun lex_err line msg = |
|
44 error ("Lexical error on line " ^ string_of_int line ^ ": " ^ msg); |
|
45 |
|
46 |
|
47 (* misc utilities *) |
|
48 |
|
49 fun count_lines cs = |
|
50 foldl (fn (sum, c) => if c = "\n" then sum + 1 else sum) (0, cs); |
|
51 |
|
52 fun inc_line n s = n + count_lines (explode s); |
|
53 |
|
54 fun cons_fst c (cs, x, y) = (c :: cs, x, y); |
|
55 |
|
56 |
|
57 (* scan strings *) |
|
58 |
|
59 val scan_ctrl = $$ "^" ^^ scan_one (fn c => ord c >= 64 andalso ord c <= 95); |
|
60 |
|
61 val scan_dig = scan_one is_digit; |
|
62 |
|
63 val scan_blanks1 = scan_any1 is_blank >> implode; |
|
64 |
|
65 val scan_esc = |
|
66 $$ "n" || $$ "t" || $$ "\"" || $$ "\\" || |
|
67 scan_ctrl || scan_dig ^^ scan_dig ^^ scan_dig || |
|
68 scan_blanks1 ^^ $$ "\\"; |
|
69 |
|
70 |
|
71 fun string ("\"" :: cs) n = (["\""], cs, n) |
|
72 | string ("\\" :: cs) n = |
|
73 (case optional scan_esc cs of |
|
74 (None, _) => lex_err n "bad escape sequence in string" |
|
75 | (Some s, cs') => cons_fst ("\\" ^ s) (string cs' (inc_line n s))) |
|
76 | string (c :: cs) n = cons_fst c (string cs n) |
|
77 | string [] n = lex_err n "missing quote at end of string"; |
|
78 |
|
79 |
|
80 (* scan verbatim text *) |
|
81 |
|
82 fun verbatim ("|" :: "}" :: cs) n = ([], cs, n) |
|
83 | verbatim (c :: cs) n = cons_fst c (verbatim cs (inc_line n c)) |
|
84 | verbatim [] n = lex_err n "missing end of verbatim text"; |
|
85 |
|
86 |
|
87 (* scan nested comments *) |
|
88 |
|
89 fun comment ("*" :: ")" :: cs) n 1 = (cs, n) |
|
90 | comment ("*" :: ")" :: cs) n d = comment cs n (d - 1) |
|
91 | comment ("(" :: "*" :: cs) n d = comment cs n (d + 1) |
|
92 | comment (c :: cs) n d = comment cs (inc_line n c) d |
|
93 | comment [] n _ = lex_err n "missing end of comment"; |
|
94 |
|
95 |
|
96 |
|
97 (** tokenize **) |
|
98 |
|
99 val scan_longid = scan_id ^^ (repeat1 ($$ "." ^^ scan_id) >> implode); |
|
100 |
|
101 fun scan_word lex = |
|
102 max_of |
|
103 (scan_literal lex >> pair Keyword) |
|
104 (scan_longid >> pair LongIdent || |
|
105 scan_id >> pair Ident || |
|
106 scan_tid >> pair TypeVar || |
|
107 scan_nat >> pair Nat); |
|
108 |
|
109 fun add_tok toks kind n (cs, cs', n') = |
|
110 ((kind, implode cs, n) :: toks, cs', n'); |
|
111 |
|
112 val take_line = implode o fst o take_prefix (not_equal "\n"); |
|
113 |
|
114 |
|
115 fun tokenize lex str = |
|
116 let |
|
117 fun scan (toks, [], n) = rev ((EOF, "end-of-file", n) :: toks) |
|
118 | scan (toks, "\"" :: cs, n) = |
|
119 scan (add_tok toks String n (cons_fst "\"" (string cs n))) |
|
120 | scan (toks, "{" :: "|" :: cs, n) = |
|
121 scan (add_tok toks Verbatim n (verbatim cs n)) |
|
122 | scan (toks, "(" :: "*" :: cs, n) = |
|
123 scan ((fn (cs', n') => (toks, cs', n')) (comment cs n 1)) |
|
124 | scan (toks, chs as c :: cs, n) = |
|
125 if is_blank c then scan (toks, cs, inc_line n c) |
|
126 else |
|
127 (case scan_word lex chs of |
|
128 (None, _) => lex_err n (quote (take_line chs)) |
|
129 | (Some (Keyword, "ML"), chs') => scan ((Verbatim, implode chs', n) |
|
130 :: (Keyword, "ML", n) :: toks, [], n + count_lines chs') |
|
131 | (Some (tk, s), chs') => scan ((tk, s, n) :: toks, chs', n)); |
|
132 in |
|
133 scan ([], explode str, 1) |
|
134 end; |
|
135 |
|
136 |
|
137 end; |
|
138 |