author | wenzelm |
Fri, 13 Mar 2009 21:22:45 +0100 | |
changeset 30511 | 5c628a39b7cb |
parent 30339 | 3fc32dd7a647 |
child 30573 | 49899f26fbd1 |
permissions | -rw-r--r-- |
5826 | 1 |
(* Title: Pure/Isar/outer_parse.ML |
2 |
Author: Markus Wenzel, TU Muenchen |
|
3 |
||
4 |
Generic parsers for Isabelle/Isar outer syntax. |
|
5 |
*) |
|
6 |
||
7 |
signature OUTER_PARSE = |
|
8 |
sig |
|
27886 | 9 |
type token = OuterLex.token |
29310 | 10 |
type 'a parser = token list -> 'a * token list |
30511 | 11 |
type 'a context_parser = Context.generic * token list -> 'a * (Context.generic * token list) |
5826 | 12 |
val group: string -> (token list -> 'a) -> token list -> 'a |
13 |
val !!! : (token list -> 'a) -> token list -> 'a |
|
8581
5c7ed2af8bfb
!!!! = cut "Corrupted outer syntax in presentation";
wenzelm
parents:
8350
diff
changeset
|
14 |
val !!!! : (token list -> 'a) -> token list -> 'a |
12047 | 15 |
val triple1: ('a * 'b) * 'c -> 'a * 'b * 'c |
16 |
val triple2: 'a * ('b * 'c) -> 'a * 'b * 'c |
|
17 |
val triple_swap: ('a * 'b) * 'c -> ('a * 'c) * 'b |
|
29310 | 18 |
val not_eof: token parser |
5826 | 19 |
val position: (token list -> 'a * 'b) -> token list -> ('a * Position.T) * 'b |
29310 | 20 |
val command: string parser |
21 |
val keyword: string parser |
|
22 |
val short_ident: string parser |
|
23 |
val long_ident: string parser |
|
24 |
val sym_ident: string parser |
|
25 |
val minus: string parser |
|
26 |
val term_var: string parser |
|
27 |
val type_ident: string parser |
|
28 |
val type_var: string parser |
|
29 |
val number: string parser |
|
30 |
val string: string parser |
|
31 |
val alt_string: string parser |
|
32 |
val verbatim: string parser |
|
33 |
val sync: string parser |
|
34 |
val eof: string parser |
|
35 |
val keyword_with: (string -> bool) -> string parser |
|
36 |
val keyword_ident_or_symbolic: string parser |
|
37 |
val $$$ : string -> string parser |
|
38 |
val reserved: string -> string parser |
|
39 |
val semicolon: string parser |
|
40 |
val underscore: string parser |
|
41 |
val maybe: 'a parser -> 'a option parser |
|
42 |
val tag_name: string parser |
|
43 |
val tags: string list parser |
|
44 |
val opt_unit: unit parser |
|
45 |
val opt_keyword: string -> bool parser |
|
46 |
val begin: string parser |
|
47 |
val opt_begin: bool parser |
|
48 |
val nat: int parser |
|
49 |
val int: int parser |
|
50 |
val enum: string -> 'a parser -> 'a list parser |
|
51 |
val enum1: string -> 'a parser -> 'a list parser |
|
52 |
val and_list: 'a parser -> 'a list parser |
|
53 |
val and_list1: 'a parser -> 'a list parser |
|
30511 | 54 |
val enum': string -> 'a context_parser -> 'a list context_parser |
55 |
val enum1': string -> 'a context_parser -> 'a list context_parser |
|
56 |
val and_list': 'a context_parser -> 'a list context_parser |
|
57 |
val and_list1': 'a context_parser -> 'a list context_parser |
|
29310 | 58 |
val list: 'a parser -> 'a list parser |
59 |
val list1: 'a parser -> 'a list parser |
|
60 |
val name: bstring parser |
|
29581 | 61 |
val binding: binding parser |
29310 | 62 |
val xname: xstring parser |
63 |
val text: string parser |
|
64 |
val path: Path.T parser |
|
65 |
val parname: string parser |
|
29581 | 66 |
val parbinding: binding parser |
29310 | 67 |
val sort: string parser |
68 |
val arity: (string * string list * string) parser |
|
69 |
val multi_arity: (string list * string list * string) parser |
|
70 |
val type_args: string list parser |
|
71 |
val typ_group: string parser |
|
72 |
val typ: string parser |
|
73 |
val mixfix: mixfix parser |
|
74 |
val mixfix': mixfix parser |
|
75 |
val opt_infix: mixfix parser |
|
76 |
val opt_infix': mixfix parser |
|
77 |
val opt_mixfix: mixfix parser |
|
78 |
val opt_mixfix': mixfix parser |
|
79 |
val where_: string parser |
|
80 |
val const: (string * string * mixfix) parser |
|
30339 | 81 |
val const_binding: (binding * string * mixfix) parser |
29581 | 82 |
val params: (binding * string option) list parser |
83 |
val simple_fixes: (binding * string option) list parser |
|
84 |
val fixes: (binding * string option * mixfix) list parser |
|
85 |
val for_fixes: (binding * string option * mixfix) list parser |
|
86 |
val for_simple_fixes: (binding * string option) list parser |
|
29310 | 87 |
val ML_source: (SymbolPos.text * Position.T) parser |
88 |
val doc_source: (SymbolPos.text * Position.T) parser |
|
89 |
val term_group: string parser |
|
90 |
val prop_group: string parser |
|
91 |
val term: string parser |
|
92 |
val prop: string parser |
|
93 |
val propp: (string * string list) parser |
|
94 |
val termp: (string * string list) parser |
|
95 |
val target: xstring parser |
|
96 |
val opt_target: xstring option parser |
|
5826 | 97 |
end; |
98 |
||
99 |
structure OuterParse: OUTER_PARSE = |
|
100 |
struct |
|
101 |
||
9131 | 102 |
structure T = OuterLex; |
103 |
type token = T.token; |
|
5826 | 104 |
|
29310 | 105 |
type 'a parser = token list -> 'a * token list; |
30511 | 106 |
type 'a context_parser = Context.generic * token list -> 'a * (Context.generic * token list); |
29310 | 107 |
|
5826 | 108 |
|
109 |
(** error handling **) |
|
110 |
||
111 |
(* group atomic parsers (no cuts!) *) |
|
112 |
||
113 |
fun fail_with s = Scan.fail_with |
|
114 |
(fn [] => s ^ " expected (past end-of-file!)" |
|
23789
1993b865c5ac
replaced OuterLex.name_of by more sophisticated OuterLex.text_of;
wenzelm
parents:
22331
diff
changeset
|
115 |
| (tok :: _) => |
1993b865c5ac
replaced OuterLex.name_of by more sophisticated OuterLex.text_of;
wenzelm
parents:
22331
diff
changeset
|
116 |
(case T.text_of tok of |
1993b865c5ac
replaced OuterLex.name_of by more sophisticated OuterLex.text_of;
wenzelm
parents:
22331
diff
changeset
|
117 |
(txt, "") => s ^ " expected,\nbut " ^ txt ^ T.pos_of tok ^ " was found" |
1993b865c5ac
replaced OuterLex.name_of by more sophisticated OuterLex.text_of;
wenzelm
parents:
22331
diff
changeset
|
118 |
| (txt1, txt2) => s ^ " expected,\nbut " ^ txt1 ^ T.pos_of tok ^ " was found:\n" ^ txt2)); |
5826 | 119 |
|
120 |
fun group s scan = scan || fail_with s; |
|
121 |
||
122 |
||
5877 | 123 |
(* cut *) |
5826 | 124 |
|
8581
5c7ed2af8bfb
!!!! = cut "Corrupted outer syntax in presentation";
wenzelm
parents:
8350
diff
changeset
|
125 |
fun cut kind scan = |
5826 | 126 |
let |
127 |
fun get_pos [] = " (past end-of-file!)" |
|
9131 | 128 |
| get_pos (tok :: _) = T.pos_of tok; |
5826 | 129 |
|
15531 | 130 |
fun err (toks, NONE) = kind ^ get_pos toks |
25625 | 131 |
| err (toks, SOME msg) = |
132 |
if String.isPrefix kind msg then msg |
|
133 |
else kind ^ get_pos toks ^ ": " ^ msg; |
|
5826 | 134 |
in Scan.!! err scan end; |
135 |
||
8586 | 136 |
fun !!! scan = cut "Outer syntax error" scan; |
137 |
fun !!!! scan = cut "Corrupted outer syntax in presentation" scan; |
|
8581
5c7ed2af8bfb
!!!! = cut "Corrupted outer syntax in presentation";
wenzelm
parents:
8350
diff
changeset
|
138 |
|
5826 | 139 |
|
140 |
||
141 |
(** basic parsers **) |
|
142 |
||
143 |
(* utils *) |
|
144 |
||
145 |
fun triple1 ((x, y), z) = (x, y, z); |
|
146 |
fun triple2 (x, (y, z)) = (x, y, z); |
|
6430 | 147 |
fun triple_swap ((x, y), z) = ((x, z), y); |
5826 | 148 |
|
149 |
||
150 |
(* tokens *) |
|
151 |
||
27815 | 152 |
fun RESET_VALUE atom = (*required for all primitive parsers*) |
153 |
Scan.ahead (Scan.one (K true)) -- atom >> (fn (arg, x) => (T.assign NONE arg; x)); |
|
154 |
||
155 |
||
156 |
val not_eof = RESET_VALUE (Scan.one T.not_eof); |
|
15703 | 157 |
|
27753
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
wenzelm
parents:
27737
diff
changeset
|
158 |
fun position scan = (Scan.ahead not_eof >> T.position_of) -- scan >> Library.swap; |
27886 | 159 |
fun source_position atom = Scan.ahead atom |-- not_eof >> T.source_position_of; |
27815 | 160 |
fun inner_syntax atom = Scan.ahead atom |-- not_eof >> T.source_of; |
5826 | 161 |
|
162 |
fun kind k = |
|
27815 | 163 |
group (T.str_of_kind k) (RESET_VALUE (Scan.one (T.is_kind k) >> T.content_of)); |
5826 | 164 |
|
9131 | 165 |
val command = kind T.Command; |
166 |
val keyword = kind T.Keyword; |
|
167 |
val short_ident = kind T.Ident; |
|
168 |
val long_ident = kind T.LongIdent; |
|
169 |
val sym_ident = kind T.SymIdent; |
|
170 |
val term_var = kind T.Var; |
|
171 |
val type_ident = kind T.TypeIdent; |
|
172 |
val type_var = kind T.TypeVar; |
|
173 |
val number = kind T.Nat; |
|
174 |
val string = kind T.String; |
|
17165 | 175 |
val alt_string = kind T.AltString; |
9131 | 176 |
val verbatim = kind T.Verbatim; |
177 |
val sync = kind T.Sync; |
|
178 |
val eof = kind T.EOF; |
|
5826 | 179 |
|
27815 | 180 |
fun keyword_with pred = RESET_VALUE (Scan.one (T.keyword_with pred) >> T.content_of); |
181 |
val keyword_ident_or_symbolic = keyword_with T.ident_or_symbolic; |
|
182 |
||
5826 | 183 |
fun $$$ x = |
27815 | 184 |
group (T.str_of_kind T.Keyword ^ " " ^ quote x) (keyword_with (fn y => x = y)); |
9131 | 185 |
|
16030 | 186 |
fun reserved x = |
27753
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
wenzelm
parents:
27737
diff
changeset
|
187 |
group ("reserved identifier " ^ quote x) |
27815 | 188 |
(RESET_VALUE (Scan.one (T.ident_with (fn y => x = y)) >> T.content_of)); |
16030 | 189 |
|
9131 | 190 |
val semicolon = $$$ ";"; |
5826 | 191 |
|
15703 | 192 |
val minus = sym_ident :-- (fn "-" => Scan.succeed () | _ => Scan.fail) >> #1; |
11792
311eee3d63b6
parser for underscore (actually a symbolic identifier!);
wenzelm
parents:
11651
diff
changeset
|
193 |
val underscore = sym_ident :-- (fn "_" => Scan.succeed () | _ => Scan.fail) >> #1; |
15703 | 194 |
fun maybe scan = underscore >> K NONE || scan >> SOME; |
11792
311eee3d63b6
parser for underscore (actually a symbolic identifier!);
wenzelm
parents:
11651
diff
changeset
|
195 |
|
14835 | 196 |
val nat = number >> (#1 o Library.read_int o Symbol.explode); |
27815 | 197 |
val int = Scan.optional (minus >> K ~1) 1 -- nat >> op *; |
5826 | 198 |
|
17070 | 199 |
val tag_name = group "tag name" (short_ident || string); |
200 |
val tags = Scan.repeat ($$$ "%" |-- !!! tag_name); |
|
201 |
||
7930 | 202 |
val opt_unit = Scan.optional ($$$ "(" -- $$$ ")" >> (K ())) (); |
14646 | 203 |
fun opt_keyword s = Scan.optional ($$$ "(" |-- !!! (($$$ s >> K true) --| $$$ ")")) false; |
204 |
||
20983 | 205 |
val begin = $$$ "begin"; |
206 |
val opt_begin = Scan.optional (begin >> K true) false; |
|
20961 | 207 |
|
5826 | 208 |
|
209 |
(* enumerations *) |
|
210 |
||
25999 | 211 |
fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan); |
5826 | 212 |
fun enum sep scan = enum1 sep scan || Scan.succeed []; |
213 |
||
27815 | 214 |
fun enum1' sep scan = scan ::: Scan.repeat (Scan.lift ($$$ sep) |-- scan); |
215 |
fun enum' sep scan = enum1' sep scan || Scan.succeed []; |
|
5826 | 216 |
|
6013 | 217 |
fun and_list1 scan = enum1 "and" scan; |
218 |
fun and_list scan = enum "and" scan; |
|
219 |
||
27815 | 220 |
fun and_list1' scan = enum1' "and" scan; |
221 |
fun and_list' scan = enum' "and" scan; |
|
222 |
||
223 |
fun list1 scan = enum1 "," scan; |
|
224 |
fun list scan = enum "," scan; |
|
225 |
||
5826 | 226 |
|
5960 | 227 |
(* names and text *) |
5826 | 228 |
|
8146 | 229 |
val name = group "name declaration" (short_ident || sym_ident || string || number); |
30223
24d975352879
renamed Binding.name_pos to Binding.make, renamed Binding.base_name to Binding.name_of, renamed Binding.map_base to Binding.map_name, added mandatory flag to Binding.qualify;
wenzelm
parents:
29581
diff
changeset
|
230 |
val binding = position name >> Binding.make; |
8146 | 231 |
val xname = group "name reference" (short_ident || long_ident || sym_ident || string || number); |
232 |
val text = group "text" (short_ident || long_ident || sym_ident || string || number || verbatim); |
|
21858
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
wenzelm
parents:
21609
diff
changeset
|
233 |
val path = group "file name/path specification" name >> Path.explode; |
6553 | 234 |
|
18898 | 235 |
val parname = Scan.optional ($$$ "(" |-- name --| $$$ ")") ""; |
28965 | 236 |
val parbinding = Scan.optional ($$$ "(" |-- binding --| $$$ ")") Binding.empty; |
18898 | 237 |
|
6553 | 238 |
|
6372 | 239 |
(* sorts and arities *) |
5826 | 240 |
|
27753
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
wenzelm
parents:
27737
diff
changeset
|
241 |
val sort = group "sort" (inner_syntax xname); |
5826 | 242 |
|
22331 | 243 |
val arity = xname -- ($$$ "::" |-- !!! |
244 |
(Scan.optional ($$$ "(" |-- !!! (list1 sort --| $$$ ")")) [] -- sort)) >> triple2; |
|
5826 | 245 |
|
25541 | 246 |
val multi_arity = and_list1 xname -- ($$$ "::" |-- !!! |
247 |
(Scan.optional ($$$ "(" |-- !!! (list1 sort --| $$$ ")")) [] -- sort)) >> triple2; |
|
248 |
||
5826 | 249 |
|
250 |
(* types *) |
|
251 |
||
27753
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
wenzelm
parents:
27737
diff
changeset
|
252 |
val typ_group = group "type" |
8146 | 253 |
(short_ident || long_ident || sym_ident || type_ident || type_var || string || number); |
5826 | 254 |
|
27753
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
wenzelm
parents:
27737
diff
changeset
|
255 |
val typ = inner_syntax typ_group; |
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
wenzelm
parents:
27737
diff
changeset
|
256 |
|
5826 | 257 |
val type_args = |
258 |
type_ident >> single || |
|
259 |
$$$ "(" |-- !!! (list1 type_ident --| $$$ ")") || |
|
260 |
Scan.succeed []; |
|
261 |
||
262 |
||
263 |
(* mixfix annotations *) |
|
264 |
||
18669 | 265 |
val mfix = string -- |
266 |
!!! (Scan.optional ($$$ "[" |-- !!! (list nat --| $$$ "]")) [] -- |
|
267 |
Scan.optional nat Syntax.max_pri) >> (Mixfix o triple2); |
|
268 |
||
269 |
val infx = $$$ "infix" |-- !!! (nat >> Infix || string -- nat >> InfixName); |
|
270 |
val infxl = $$$ "infixl" |-- !!! (nat >> Infixl || string -- nat >> InfixlName); |
|
271 |
val infxr = $$$ "infixr" |-- !!! (nat >> Infixr || string -- nat >> InfixrName); |
|
5826 | 272 |
|
18669 | 273 |
val binder = $$$ "binder" |-- |
274 |
!!! (string -- ($$$ "[" |-- nat --| $$$ "]" -- nat || nat >> (fn n => (n, n)))) |
|
275 |
>> (Binder o triple2); |
|
276 |
||
277 |
fun annotation guard fix = $$$ "(" |-- guard (fix --| $$$ ")"); |
|
278 |
fun opt_annotation guard fix = Scan.optional (annotation guard fix) NoSyn; |
|
279 |
||
280 |
val mixfix = annotation !!! (mfix || binder || infxl || infxr || infx); |
|
21609 | 281 |
val mixfix' = annotation I (mfix || binder || infxl || infxr || infx); |
18669 | 282 |
val opt_infix = opt_annotation !!! (infxl || infxr || infx); |
21609 | 283 |
val opt_infix' = opt_annotation I (infxl || infxr || infx); |
18669 | 284 |
val opt_mixfix = opt_annotation !!! (mfix || binder || infxl || infxr || infx); |
285 |
val opt_mixfix' = opt_annotation I (mfix || binder || infxl || infxr || infx); |
|
5826 | 286 |
|
287 |
||
18669 | 288 |
(* fixes *) |
5826 | 289 |
|
21400 | 290 |
val where_ = $$$ "where"; |
291 |
||
18618 | 292 |
val const = name -- ($$$ "::" |-- !!! typ) -- opt_mixfix >> triple1; |
30339 | 293 |
val const_binding = binding -- ($$$ "::" |-- !!! typ) -- opt_mixfix >> triple1; |
18669 | 294 |
|
28081
d664b2c1dfe6
explicit type Name.binding for higher-specification elements;
wenzelm
parents:
28017
diff
changeset
|
295 |
val params = Scan.repeat1 binding -- Scan.option ($$$ "::" |-- !!! typ) |
18669 | 296 |
>> (fn (xs, T) => map (rpair T) xs); |
297 |
||
19482
9f11af8f7ef9
tuned basic list operators (flat, maps, map_filter);
wenzelm
parents:
19284
diff
changeset
|
298 |
val simple_fixes = and_list1 params >> flat; |
18669 | 299 |
|
300 |
val fixes = |
|
28081
d664b2c1dfe6
explicit type Name.binding for higher-specification elements;
wenzelm
parents:
28017
diff
changeset
|
301 |
and_list1 (binding -- Scan.option ($$$ "::" |-- typ) -- mixfix >> (single o triple1) || |
19482
9f11af8f7ef9
tuned basic list operators (flat, maps, map_filter);
wenzelm
parents:
19284
diff
changeset
|
302 |
params >> map Syntax.no_syn) >> flat; |
5826 | 303 |
|
19845 | 304 |
val for_fixes = Scan.optional ($$$ "for" |-- !!! fixes) []; |
21371 | 305 |
val for_simple_fixes = Scan.optional ($$$ "for" |-- !!! simple_fixes) []; |
19845 | 306 |
|
5826 | 307 |
|
27877 | 308 |
(* embedded source text *) |
27872
631371a02b8c
P.doc_source and P.ml_sorce for proper SymbolPos.text;
wenzelm
parents:
27815
diff
changeset
|
309 |
|
27877 | 310 |
val ML_source = source_position (group "ML source" text); |
27872
631371a02b8c
P.doc_source and P.ml_sorce for proper SymbolPos.text;
wenzelm
parents:
27815
diff
changeset
|
311 |
val doc_source = source_position (group "document source" text); |
631371a02b8c
P.doc_source and P.ml_sorce for proper SymbolPos.text;
wenzelm
parents:
27815
diff
changeset
|
312 |
|
631371a02b8c
P.doc_source and P.ml_sorce for proper SymbolPos.text;
wenzelm
parents:
27815
diff
changeset
|
313 |
|
5826 | 314 |
(* terms *) |
315 |
||
7477 | 316 |
val trm = short_ident || long_ident || sym_ident || term_var || number || string; |
5826 | 317 |
|
27753
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
wenzelm
parents:
27737
diff
changeset
|
318 |
val term_group = group "term" trm; |
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
wenzelm
parents:
27737
diff
changeset
|
319 |
val prop_group = group "proposition" trm; |
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
wenzelm
parents:
27737
diff
changeset
|
320 |
|
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
wenzelm
parents:
27737
diff
changeset
|
321 |
val term = inner_syntax term_group; |
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
wenzelm
parents:
27737
diff
changeset
|
322 |
val prop = inner_syntax prop_group; |
5826 | 323 |
|
324 |
||
6949 | 325 |
(* patterns *) |
6935 | 326 |
|
6949 | 327 |
val is_terms = Scan.repeat1 ($$$ "is" |-- term); |
6935 | 328 |
val is_props = Scan.repeat1 ($$$ "is" |-- prop); |
329 |
||
19585 | 330 |
val propp = prop -- Scan.optional ($$$ "(" |-- !!! (is_props --| $$$ ")")) []; |
6949 | 331 |
val termp = term -- Scan.optional ($$$ "(" |-- !!! (is_terms --| $$$ ")")) []; |
6935 | 332 |
|
333 |
||
22119 | 334 |
(* targets *) |
19811 | 335 |
|
22119 | 336 |
val target = ($$$ "(" -- $$$ "in") |-- !!! (xname --| $$$ ")"); |
337 |
val opt_target = Scan.option target; |
|
12272 | 338 |
|
339 |
end; |
|
30511 | 340 |
|
341 |
type 'a parser = 'a OuterParse.parser; |
|
342 |
type 'a context_parser = 'a OuterParse.context_parser; |
|
343 |