29 val is_comment: token -> bool |
33 val is_comment: token -> bool |
30 val is_begin_ignore: token -> bool |
34 val is_begin_ignore: token -> bool |
31 val is_end_ignore: token -> bool |
35 val is_end_ignore: token -> bool |
32 val is_blank: token -> bool |
36 val is_blank: token -> bool |
33 val is_newline: token -> bool |
37 val is_newline: token -> bool |
34 val val_of: token -> string |
|
35 val source_of: token -> string |
38 val source_of: token -> string |
|
39 val content_of: token -> string |
36 val unparse: token -> string |
40 val unparse: token -> string |
37 val text_of: token -> string * string |
41 val text_of: token -> string * string |
38 val is_sid: string -> bool |
42 val get_value: token -> value option |
|
43 val map_value: (value -> value) -> token -> token |
|
44 val mk_text: string -> token |
|
45 val mk_typ: typ -> token |
|
46 val mk_term: term -> token |
|
47 val mk_fact: thm list -> token |
|
48 val mk_attribute: (morphism -> attribute) -> token |
|
49 val assignable: token -> token |
|
50 val assign: value option -> token -> unit |
|
51 val closure: token -> token |
|
52 val ident_or_symbolic: string -> bool |
39 val !!! : string -> (SymbolPos.T list -> 'a) -> SymbolPos.T list -> 'a |
53 val !!! : string -> (SymbolPos.T list -> 'a) -> SymbolPos.T list -> 'a |
40 val scan_quoted: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
54 val scan_quoted: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
41 val source_proper: (token, 'a) Source.source -> (token, (token, 'a) Source.source) Source.source |
55 val source_proper: (token, 'a) Source.source -> (token, (token, 'a) Source.source) Source.source |
42 val source': bool Option.option -> (unit -> Scan.lexicon * Scan.lexicon) -> |
56 val source': bool Option.option -> (unit -> Scan.lexicon * Scan.lexicon) -> |
43 (SymbolPos.T, 'a) Source.source -> (token, (SymbolPos.T, 'a) Source.source) Source.source |
57 (SymbolPos.T, 'a) Source.source -> (token, (SymbolPos.T, 'a) Source.source) Source.source |
72 | String => "string" |
107 | String => "string" |
73 | AltString => "back-quoted string" |
108 | AltString => "back-quoted string" |
74 | Verbatim => "verbatim text" |
109 | Verbatim => "verbatim text" |
75 | Space => "white space" |
110 | Space => "white space" |
76 | Comment => "comment text" |
111 | Comment => "comment text" |
|
112 | InternalValue => "internal value" |
77 | Malformed => "malformed symbolic character" |
113 | Malformed => "malformed symbolic character" |
78 | Error _ => "bad input" |
114 | Error _ => "bad input" |
79 | Sync => "sync marker" |
115 | Sync => "sync marker" |
80 | EOF => "end-of-file"; |
116 | EOF => "end-of-file"; |
81 |
117 |
82 |
118 |
83 (* position *) |
119 (* position *) |
84 |
120 |
85 fun position_of (Token ((_, (pos, _)), _)) = pos; |
121 fun position_of (Token ((_, (pos, _)), _, _)) = pos; |
86 fun end_position_of (Token ((_, (_, pos)), _)) = pos; |
122 fun end_position_of (Token ((_, (_, pos)), _, _)) = pos; |
87 |
123 |
88 val pos_of = Position.str_of o position_of; |
124 val pos_of = Position.str_of o position_of; |
89 |
125 |
90 |
126 |
91 (* control tokens *) |
127 (* control tokens *) |
92 |
128 |
93 fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, "")); |
129 fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, ""), Slot); |
94 val eof = mk_eof Position.none; |
130 val eof = mk_eof Position.none; |
95 |
131 |
96 fun is_eof (Token (_, (EOF, _))) = true |
132 fun is_eof (Token (_, (EOF, _), _)) = true |
97 | is_eof _ = false; |
133 | is_eof _ = false; |
98 |
134 |
99 val not_eof = not o is_eof; |
135 val not_eof = not o is_eof; |
100 |
136 |
101 fun not_sync (Token (_, (Sync, _))) = false |
137 fun not_sync (Token (_, (Sync, _), _)) = false |
102 | not_sync _ = true; |
138 | not_sync _ = true; |
103 |
139 |
104 val stopper = |
140 val stopper = |
105 Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof; |
141 Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof; |
106 |
142 |
107 |
143 |
108 (* kind of token *) |
144 (* kind of token *) |
109 |
145 |
110 fun kind_of (Token (_, (k, _))) = k; |
146 fun kind_of (Token (_, (k, _), _)) = k; |
111 fun is_kind k (Token (_, (k', _))) = k = k'; |
147 fun is_kind k (Token (_, (k', _), _)) = k = k'; |
112 |
148 |
113 fun keyword_with pred (Token (_, (Keyword, x))) = pred x |
149 fun keyword_with pred (Token (_, (Keyword, x), _)) = pred x |
114 | keyword_with _ _ = false; |
150 | keyword_with _ _ = false; |
115 |
151 |
116 fun ident_with pred (Token (_, (Ident, x))) = pred x |
152 fun ident_with pred (Token (_, (Ident, x), _)) = pred x |
117 | ident_with _ _ = false; |
153 | ident_with _ _ = false; |
118 |
154 |
119 fun is_proper (Token (_, (Space, _))) = false |
155 fun is_proper (Token (_, (Space, _), _)) = false |
120 | is_proper (Token (_, (Comment, _))) = false |
156 | is_proper (Token (_, (Comment, _), _)) = false |
121 | is_proper _ = true; |
157 | is_proper _ = true; |
122 |
158 |
123 fun is_semicolon (Token (_, (Keyword, ";"))) = true |
159 fun is_semicolon (Token (_, (Keyword, ";"), _)) = true |
124 | is_semicolon _ = false; |
160 | is_semicolon _ = false; |
125 |
161 |
126 fun is_comment (Token (_, (Comment, _))) = true |
162 fun is_comment (Token (_, (Comment, _), _)) = true |
127 | is_comment _ = false; |
163 | is_comment _ = false; |
128 |
164 |
129 fun is_begin_ignore (Token (_, (Comment, "<"))) = true |
165 fun is_begin_ignore (Token (_, (Comment, "<"), _)) = true |
130 | is_begin_ignore _ = false; |
166 | is_begin_ignore _ = false; |
131 |
167 |
132 fun is_end_ignore (Token (_, (Comment, ">"))) = true |
168 fun is_end_ignore (Token (_, (Comment, ">"), _)) = true |
133 | is_end_ignore _ = false; |
169 | is_end_ignore _ = false; |
134 |
170 |
135 |
171 |
136 (* blanks and newlines -- space tokens obey lines *) |
172 (* blanks and newlines -- space tokens obey lines *) |
137 |
173 |
138 fun is_blank (Token (_, (Space, x))) = not (String.isSuffix "\n" x) |
174 fun is_blank (Token (_, (Space, x), _)) = not (String.isSuffix "\n" x) |
139 | is_blank _ = false; |
175 | is_blank _ = false; |
140 |
176 |
141 fun is_newline (Token (_, (Space, x))) = String.isSuffix "\n" x |
177 fun is_newline (Token (_, (Space, x), _)) = String.isSuffix "\n" x |
142 | is_newline _ = false; |
178 | is_newline _ = false; |
143 |
179 |
144 |
180 |
145 (* token content *) |
181 (* token content *) |
146 |
182 |
147 fun val_of (Token (_, (_, x))) = x; |
183 fun source_of (Token ((source, (pos, _)), _, _)) = |
148 |
|
149 fun source_of (Token ((source, (pos, _)), _)) = |
|
150 YXML.string_of (XML.Elem (Markup.tokenN, Position.properties_of pos, [XML.Text source])); |
184 YXML.string_of (XML.Elem (Markup.tokenN, Position.properties_of pos, [XML.Text source])); |
|
185 |
|
186 fun content_of (Token (_, (_, x), _)) = x; |
151 |
187 |
152 |
188 |
153 (* unparse *) |
189 (* unparse *) |
154 |
190 |
155 fun escape q = |
191 fun escape q = |
156 implode o map (fn s => if s = q orelse s = "\\" then "\\" ^ s else s) o Symbol.explode; |
192 implode o map (fn s => if s = q orelse s = "\\" then "\\" ^ s else s) o Symbol.explode; |
157 |
193 |
158 fun unparse (Token (_, (kind, x))) = |
194 fun unparse (Token (_, (kind, x), _)) = |
159 (case kind of |
195 (case kind of |
160 String => x |> quote o escape "\"" |
196 String => x |> quote o escape "\"" |
161 | AltString => x |> enclose "`" "`" o escape "`" |
197 | AltString => x |> enclose "`" "`" o escape "`" |
162 | Verbatim => x |> enclose "{*" "*}" |
198 | Verbatim => x |> enclose "{*" "*}" |
163 | Comment => x |> enclose "(*" "*)" |
199 | Comment => x |> enclose "(*" "*)" |
170 if is_semicolon tok then ("terminator", "") |
206 if is_semicolon tok then ("terminator", "") |
171 else |
207 else |
172 let |
208 let |
173 val k = str_of_kind (kind_of tok); |
209 val k = str_of_kind (kind_of tok); |
174 val s = unparse tok |
210 val s = unparse tok |
175 handle ERROR _ => Symbol.separate_chars (val_of tok); |
211 handle ERROR _ => Symbol.separate_chars (content_of tok); |
176 in |
212 in |
177 if s = "" then (k, "") |
213 if s = "" then (k, "") |
178 else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) then (k ^ " " ^ s, "") |
214 else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) then (k ^ " " ^ s, "") |
179 else (k, s) |
215 else (k, s) |
180 end; |
216 end; |
181 |
217 |
182 |
218 |
183 |
219 |
|
220 (** associated values **) |
|
221 |
|
222 (* access values *) |
|
223 |
|
224 fun get_value (Token (_, _, Value v)) = v |
|
225 | get_value _ = NONE; |
|
226 |
|
227 fun map_value f (Token (x, y, Value (SOME v))) = Token (x, y, Value (SOME (f v))) |
|
228 | map_value _ tok = tok; |
|
229 |
|
230 |
|
231 (* make values *) |
|
232 |
|
233 fun mk_value k v = Token ((k, Position.no_range), (InternalValue, k), Value (SOME v)); |
|
234 |
|
235 val mk_text = mk_value "<text>" o Text; |
|
236 val mk_typ = mk_value "<typ>" o Typ; |
|
237 val mk_term = mk_value "<term>" o Term; |
|
238 val mk_fact = mk_value "<fact>" o Fact; |
|
239 val mk_attribute = mk_value "<attribute>" o Attribute; |
|
240 |
|
241 |
|
242 (* static binding *) |
|
243 |
|
244 (*1st stage: make empty slots assignable*) |
|
245 fun assignable (Token (x, y, Slot)) = Token (x, y, Assignable (ref NONE)) |
|
246 | assignable tok = tok; |
|
247 |
|
248 (*2nd stage: assign values as side-effect of scanning*) |
|
249 fun assign v (Token (_, _, Assignable r)) = r := v |
|
250 | assign _ _ = (); |
|
251 |
|
252 (*3rd stage: static closure of final values*) |
|
253 fun closure (Token (x, y, Assignable (ref v))) = Token (x, y, Value v) |
|
254 | closure tok = tok; |
|
255 |
|
256 |
|
257 |
184 (** scanners **) |
258 (** scanners **) |
185 |
259 |
186 open BasicSymbolPos; |
260 open BasicSymbolPos; |
187 |
261 |
188 fun !!! msg = SymbolPos.!!! ("Outer lexical error: " ^ msg); |
262 fun !!! msg = SymbolPos.!!! ("Outer lexical error: " ^ msg); |