| author | wenzelm | 
| Thu, 30 Jul 2009 18:43:52 +0200 | |
| changeset 32285 | ab9b66c2bbca | 
| parent 30573 | 49899f26fbd1 | 
| child 35129 | ed24ba6f69aa | 
| permissions | -rw-r--r-- | 
| 5826 | 1 | (* Title: Pure/Isar/outer_parse.ML | 
| 2 | Author: Markus Wenzel, TU Muenchen | |
| 3 | ||
| 4 | Generic parsers for Isabelle/Isar outer syntax. | |
| 5 | *) | |
| 6 | ||
| 7 | signature OUTER_PARSE = | |
| 8 | sig | |
| 27886 | 9 | type token = OuterLex.token | 
| 29310 | 10 | type 'a parser = token list -> 'a * token list | 
| 30511 | 11 | type 'a context_parser = Context.generic * token list -> 'a * (Context.generic * token list) | 
| 5826 | 12 | val group: string -> (token list -> 'a) -> token list -> 'a | 
| 13 | val !!! : (token list -> 'a) -> token list -> 'a | |
| 8581 
5c7ed2af8bfb
!!!! = cut "Corrupted outer syntax in presentation";
 wenzelm parents: 
8350diff
changeset | 14 | val !!!! : (token list -> 'a) -> token list -> 'a | 
| 12047 | 15 |   val triple1: ('a * 'b) * 'c -> 'a * 'b * 'c
 | 
| 16 |   val triple2: 'a * ('b * 'c) -> 'a * 'b * 'c
 | |
| 17 |   val triple_swap: ('a * 'b) * 'c -> ('a * 'c) * 'b
 | |
| 29310 | 18 | val not_eof: token parser | 
| 5826 | 19 |   val position: (token list -> 'a * 'b) -> token list -> ('a * Position.T) * 'b
 | 
| 29310 | 20 | val command: string parser | 
| 21 | val keyword: string parser | |
| 22 | val short_ident: string parser | |
| 23 | val long_ident: string parser | |
| 24 | val sym_ident: string parser | |
| 25 | val minus: string parser | |
| 26 | val term_var: string parser | |
| 27 | val type_ident: string parser | |
| 28 | val type_var: string parser | |
| 29 | val number: string parser | |
| 30 | val string: string parser | |
| 31 | val alt_string: string parser | |
| 32 | val verbatim: string parser | |
| 33 | val sync: string parser | |
| 34 | val eof: string parser | |
| 35 | val keyword_with: (string -> bool) -> string parser | |
| 36 | val keyword_ident_or_symbolic: string parser | |
| 37 | val $$$ : string -> string parser | |
| 38 | val reserved: string -> string parser | |
| 39 | val semicolon: string parser | |
| 40 | val underscore: string parser | |
| 41 | val maybe: 'a parser -> 'a option parser | |
| 42 | val tag_name: string parser | |
| 43 | val tags: string list parser | |
| 44 | val opt_unit: unit parser | |
| 45 | val opt_keyword: string -> bool parser | |
| 46 | val begin: string parser | |
| 47 | val opt_begin: bool parser | |
| 48 | val nat: int parser | |
| 49 | val int: int parser | |
| 50 | val enum: string -> 'a parser -> 'a list parser | |
| 51 | val enum1: string -> 'a parser -> 'a list parser | |
| 52 | val and_list: 'a parser -> 'a list parser | |
| 53 | val and_list1: 'a parser -> 'a list parser | |
| 30511 | 54 | val enum': string -> 'a context_parser -> 'a list context_parser | 
| 55 | val enum1': string -> 'a context_parser -> 'a list context_parser | |
| 56 | val and_list': 'a context_parser -> 'a list context_parser | |
| 57 | val and_list1': 'a context_parser -> 'a list context_parser | |
| 29310 | 58 | val list: 'a parser -> 'a list parser | 
| 59 | val list1: 'a parser -> 'a list parser | |
| 60 | val name: bstring parser | |
| 29581 | 61 | val binding: binding parser | 
| 29310 | 62 | val xname: xstring parser | 
| 63 | val text: string parser | |
| 64 | val path: Path.T parser | |
| 65 | val parname: string parser | |
| 29581 | 66 | val parbinding: binding parser | 
| 29310 | 67 | val sort: string parser | 
| 68 | val arity: (string * string list * string) parser | |
| 69 | val multi_arity: (string list * string list * string) parser | |
| 70 | val type_args: string list parser | |
| 71 | val typ_group: string parser | |
| 72 | val typ: string parser | |
| 73 | val mixfix: mixfix parser | |
| 74 | val mixfix': mixfix parser | |
| 75 | val opt_infix: mixfix parser | |
| 76 | val opt_infix': mixfix parser | |
| 77 | val opt_mixfix: mixfix parser | |
| 78 | val opt_mixfix': mixfix parser | |
| 79 | val where_: string parser | |
| 80 | val const: (string * string * mixfix) parser | |
| 30339 | 81 | val const_binding: (binding * string * mixfix) parser | 
| 29581 | 82 | val params: (binding * string option) list parser | 
| 83 | val simple_fixes: (binding * string option) list parser | |
| 84 | val fixes: (binding * string option * mixfix) list parser | |
| 85 | val for_fixes: (binding * string option * mixfix) list parser | |
| 86 | val for_simple_fixes: (binding * string option) list parser | |
| 30573 | 87 | val ML_source: (Symbol_Pos.text * Position.T) parser | 
| 88 | val doc_source: (Symbol_Pos.text * Position.T) parser | |
| 29310 | 89 | val term_group: string parser | 
| 90 | val prop_group: string parser | |
| 91 | val term: string parser | |
| 92 | val prop: string parser | |
| 93 | val propp: (string * string list) parser | |
| 94 | val termp: (string * string list) parser | |
| 95 | val target: xstring parser | |
| 96 | val opt_target: xstring option parser | |
| 5826 | 97 | end; | 
| 98 | ||
| 99 | structure OuterParse: OUTER_PARSE = | |
| 100 | struct | |
| 101 | ||
| 9131 | 102 | structure T = OuterLex; | 
| 103 | type token = T.token; | |
| 5826 | 104 | |
| 29310 | 105 | type 'a parser = token list -> 'a * token list; | 
| 30511 | 106 | type 'a context_parser = Context.generic * token list -> 'a * (Context.generic * token list); | 
| 29310 | 107 | |
| 5826 | 108 | |
| 109 | (** error handling **) | |
| 110 | ||
| 111 | (* group atomic parsers (no cuts!) *) | |
| 112 | ||
| 113 | fun fail_with s = Scan.fail_with | |
| 114 | (fn [] => s ^ " expected (past end-of-file!)" | |
| 23789 
1993b865c5ac
replaced OuterLex.name_of by more sophisticated OuterLex.text_of;
 wenzelm parents: 
22331diff
changeset | 115 | | (tok :: _) => | 
| 
1993b865c5ac
replaced OuterLex.name_of by more sophisticated OuterLex.text_of;
 wenzelm parents: 
22331diff
changeset | 116 | (case T.text_of tok of | 
| 
1993b865c5ac
replaced OuterLex.name_of by more sophisticated OuterLex.text_of;
 wenzelm parents: 
22331diff
changeset | 117 | (txt, "") => s ^ " expected,\nbut " ^ txt ^ T.pos_of tok ^ " was found" | 
| 
1993b865c5ac
replaced OuterLex.name_of by more sophisticated OuterLex.text_of;
 wenzelm parents: 
22331diff
changeset | 118 | | (txt1, txt2) => s ^ " expected,\nbut " ^ txt1 ^ T.pos_of tok ^ " was found:\n" ^ txt2)); | 
| 5826 | 119 | |
| 120 | fun group s scan = scan || fail_with s; | |
| 121 | ||
| 122 | ||
| 5877 | 123 | (* cut *) | 
| 5826 | 124 | |
| 8581 
5c7ed2af8bfb
!!!! = cut "Corrupted outer syntax in presentation";
 wenzelm parents: 
8350diff
changeset | 125 | fun cut kind scan = | 
| 5826 | 126 | let | 
| 127 | fun get_pos [] = " (past end-of-file!)" | |
| 9131 | 128 | | get_pos (tok :: _) = T.pos_of tok; | 
| 5826 | 129 | |
| 15531 | 130 | fun err (toks, NONE) = kind ^ get_pos toks | 
| 25625 | 131 | | err (toks, SOME msg) = | 
| 132 | if String.isPrefix kind msg then msg | |
| 133 | else kind ^ get_pos toks ^ ": " ^ msg; | |
| 5826 | 134 | in Scan.!! err scan end; | 
| 135 | ||
| 8586 | 136 | fun !!! scan = cut "Outer syntax error" scan; | 
| 137 | fun !!!! scan = cut "Corrupted outer syntax in presentation" scan; | |
| 8581 
5c7ed2af8bfb
!!!! = cut "Corrupted outer syntax in presentation";
 wenzelm parents: 
8350diff
changeset | 138 | |
| 5826 | 139 | |
| 140 | ||
| 141 | (** basic parsers **) | |
| 142 | ||
| 143 | (* utils *) | |
| 144 | ||
| 145 | fun triple1 ((x, y), z) = (x, y, z); | |
| 146 | fun triple2 (x, (y, z)) = (x, y, z); | |
| 6430 | 147 | fun triple_swap ((x, y), z) = ((x, z), y); | 
| 5826 | 148 | |
| 149 | ||
| 150 | (* tokens *) | |
| 151 | ||
| 27815 | 152 | fun RESET_VALUE atom = (*required for all primitive parsers*) | 
| 153 | Scan.ahead (Scan.one (K true)) -- atom >> (fn (arg, x) => (T.assign NONE arg; x)); | |
| 154 | ||
| 155 | ||
| 156 | val not_eof = RESET_VALUE (Scan.one T.not_eof); | |
| 15703 | 157 | |
| 27753 
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
 wenzelm parents: 
27737diff
changeset | 158 | fun position scan = (Scan.ahead not_eof >> T.position_of) -- scan >> Library.swap; | 
| 27886 | 159 | fun source_position atom = Scan.ahead atom |-- not_eof >> T.source_position_of; | 
| 27815 | 160 | fun inner_syntax atom = Scan.ahead atom |-- not_eof >> T.source_of; | 
| 5826 | 161 | |
| 162 | fun kind k = | |
| 27815 | 163 | group (T.str_of_kind k) (RESET_VALUE (Scan.one (T.is_kind k) >> T.content_of)); | 
| 5826 | 164 | |
| 9131 | 165 | val command = kind T.Command; | 
| 166 | val keyword = kind T.Keyword; | |
| 167 | val short_ident = kind T.Ident; | |
| 168 | val long_ident = kind T.LongIdent; | |
| 169 | val sym_ident = kind T.SymIdent; | |
| 170 | val term_var = kind T.Var; | |
| 171 | val type_ident = kind T.TypeIdent; | |
| 172 | val type_var = kind T.TypeVar; | |
| 173 | val number = kind T.Nat; | |
| 174 | val string = kind T.String; | |
| 17165 | 175 | val alt_string = kind T.AltString; | 
| 9131 | 176 | val verbatim = kind T.Verbatim; | 
| 177 | val sync = kind T.Sync; | |
| 178 | val eof = kind T.EOF; | |
| 5826 | 179 | |
| 27815 | 180 | fun keyword_with pred = RESET_VALUE (Scan.one (T.keyword_with pred) >> T.content_of); | 
| 181 | val keyword_ident_or_symbolic = keyword_with T.ident_or_symbolic; | |
| 182 | ||
| 5826 | 183 | fun $$$ x = | 
| 27815 | 184 | group (T.str_of_kind T.Keyword ^ " " ^ quote x) (keyword_with (fn y => x = y)); | 
| 9131 | 185 | |
| 16030 | 186 | fun reserved x = | 
| 27753 
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
 wenzelm parents: 
27737diff
changeset | 187 |   group ("reserved identifier " ^ quote x)
 | 
| 27815 | 188 | (RESET_VALUE (Scan.one (T.ident_with (fn y => x = y)) >> T.content_of)); | 
| 16030 | 189 | |
| 9131 | 190 | val semicolon = $$$ ";"; | 
| 5826 | 191 | |
| 15703 | 192 | val minus = sym_ident :-- (fn "-" => Scan.succeed () | _ => Scan.fail) >> #1; | 
| 11792 
311eee3d63b6
parser for underscore (actually a symbolic identifier!);
 wenzelm parents: 
11651diff
changeset | 193 | val underscore = sym_ident :-- (fn "_" => Scan.succeed () | _ => Scan.fail) >> #1; | 
| 15703 | 194 | fun maybe scan = underscore >> K NONE || scan >> SOME; | 
| 11792 
311eee3d63b6
parser for underscore (actually a symbolic identifier!);
 wenzelm parents: 
11651diff
changeset | 195 | |
| 14835 | 196 | val nat = number >> (#1 o Library.read_int o Symbol.explode); | 
| 27815 | 197 | val int = Scan.optional (minus >> K ~1) 1 -- nat >> op *; | 
| 5826 | 198 | |
| 17070 | 199 | val tag_name = group "tag name" (short_ident || string); | 
| 200 | val tags = Scan.repeat ($$$ "%" |-- !!! tag_name); | |
| 201 | ||
| 7930 | 202 | val opt_unit = Scan.optional ($$$ "(" -- $$$ ")" >> (K ())) ();
 | 
| 14646 | 203 | fun opt_keyword s = Scan.optional ($$$ "(" |-- !!! (($$$ s >> K true) --| $$$ ")")) false;
 | 
| 204 | ||
| 20983 | 205 | val begin = $$$ "begin"; | 
| 206 | val opt_begin = Scan.optional (begin >> K true) false; | |
| 20961 | 207 | |
| 5826 | 208 | |
| 209 | (* enumerations *) | |
| 210 | ||
| 25999 | 211 | fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan); | 
| 5826 | 212 | fun enum sep scan = enum1 sep scan || Scan.succeed []; | 
| 213 | ||
| 27815 | 214 | fun enum1' sep scan = scan ::: Scan.repeat (Scan.lift ($$$ sep) |-- scan); | 
| 215 | fun enum' sep scan = enum1' sep scan || Scan.succeed []; | |
| 5826 | 216 | |
| 6013 | 217 | fun and_list1 scan = enum1 "and" scan; | 
| 218 | fun and_list scan = enum "and" scan; | |
| 219 | ||
| 27815 | 220 | fun and_list1' scan = enum1' "and" scan; | 
| 221 | fun and_list' scan = enum' "and" scan; | |
| 222 | ||
| 223 | fun list1 scan = enum1 "," scan; | |
| 224 | fun list scan = enum "," scan; | |
| 225 | ||
| 5826 | 226 | |
| 5960 | 227 | (* names and text *) | 
| 5826 | 228 | |
| 8146 | 229 | val name = group "name declaration" (short_ident || sym_ident || string || number); | 
| 30223 
24d975352879
renamed Binding.name_pos to Binding.make, renamed Binding.base_name to Binding.name_of, renamed Binding.map_base to Binding.map_name, added mandatory flag to Binding.qualify;
 wenzelm parents: 
29581diff
changeset | 230 | val binding = position name >> Binding.make; | 
| 8146 | 231 | val xname = group "name reference" (short_ident || long_ident || sym_ident || string || number); | 
| 232 | val text = group "text" (short_ident || long_ident || sym_ident || string || number || verbatim); | |
| 21858 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
21609diff
changeset | 233 | val path = group "file name/path specification" name >> Path.explode; | 
| 6553 | 234 | |
| 18898 | 235 | val parname = Scan.optional ($$$ "(" |-- name --| $$$ ")") "";
 | 
| 28965 | 236 | val parbinding = Scan.optional ($$$ "(" |-- binding --| $$$ ")") Binding.empty;
 | 
| 18898 | 237 | |
| 6553 | 238 | |
| 6372 | 239 | (* sorts and arities *) | 
| 5826 | 240 | |
| 27753 
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
 wenzelm parents: 
27737diff
changeset | 241 | val sort = group "sort" (inner_syntax xname); | 
| 5826 | 242 | |
| 22331 | 243 | val arity = xname -- ($$$ "::" |-- !!! | 
| 244 |   (Scan.optional ($$$ "(" |-- !!! (list1 sort --| $$$ ")")) [] -- sort)) >> triple2;
 | |
| 5826 | 245 | |
| 25541 | 246 | val multi_arity = and_list1 xname -- ($$$ "::" |-- !!! | 
| 247 |   (Scan.optional ($$$ "(" |-- !!! (list1 sort --| $$$ ")")) [] -- sort)) >> triple2;
 | |
| 248 | ||
| 5826 | 249 | |
| 250 | (* types *) | |
| 251 | ||
| 27753 
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
 wenzelm parents: 
27737diff
changeset | 252 | val typ_group = group "type" | 
| 8146 | 253 | (short_ident || long_ident || sym_ident || type_ident || type_var || string || number); | 
| 5826 | 254 | |
| 27753 
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
 wenzelm parents: 
27737diff
changeset | 255 | val typ = inner_syntax typ_group; | 
| 
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
 wenzelm parents: 
27737diff
changeset | 256 | |
| 5826 | 257 | val type_args = | 
| 258 | type_ident >> single || | |
| 259 |   $$$ "(" |-- !!! (list1 type_ident --| $$$ ")") ||
 | |
| 260 | Scan.succeed []; | |
| 261 | ||
| 262 | ||
| 263 | (* mixfix annotations *) | |
| 264 | ||
| 18669 | 265 | val mfix = string -- | 
| 266 | !!! (Scan.optional ($$$ "[" |-- !!! (list nat --| $$$ "]")) [] -- | |
| 267 | Scan.optional nat Syntax.max_pri) >> (Mixfix o triple2); | |
| 268 | ||
| 269 | val infx = $$$ "infix" |-- !!! (nat >> Infix || string -- nat >> InfixName); | |
| 270 | val infxl = $$$ "infixl" |-- !!! (nat >> Infixl || string -- nat >> InfixlName); | |
| 271 | val infxr = $$$ "infixr" |-- !!! (nat >> Infixr || string -- nat >> InfixrName); | |
| 5826 | 272 | |
| 18669 | 273 | val binder = $$$ "binder" |-- | 
| 274 | !!! (string -- ($$$ "[" |-- nat --| $$$ "]" -- nat || nat >> (fn n => (n, n)))) | |
| 275 | >> (Binder o triple2); | |
| 276 | ||
| 277 | fun annotation guard fix = $$$ "(" |-- guard (fix --| $$$ ")");
 | |
| 278 | fun opt_annotation guard fix = Scan.optional (annotation guard fix) NoSyn; | |
| 279 | ||
| 280 | val mixfix = annotation !!! (mfix || binder || infxl || infxr || infx); | |
| 21609 | 281 | val mixfix' = annotation I (mfix || binder || infxl || infxr || infx); | 
| 18669 | 282 | val opt_infix = opt_annotation !!! (infxl || infxr || infx); | 
| 21609 | 283 | val opt_infix' = opt_annotation I (infxl || infxr || infx); | 
| 18669 | 284 | val opt_mixfix = opt_annotation !!! (mfix || binder || infxl || infxr || infx); | 
| 285 | val opt_mixfix' = opt_annotation I (mfix || binder || infxl || infxr || infx); | |
| 5826 | 286 | |
| 287 | ||
| 18669 | 288 | (* fixes *) | 
| 5826 | 289 | |
| 21400 | 290 | val where_ = $$$ "where"; | 
| 291 | ||
| 18618 | 292 | val const = name -- ($$$ "::" |-- !!! typ) -- opt_mixfix >> triple1; | 
| 30339 | 293 | val const_binding = binding -- ($$$ "::" |-- !!! typ) -- opt_mixfix >> triple1; | 
| 18669 | 294 | |
| 28081 
d664b2c1dfe6
explicit type Name.binding for higher-specification elements;
 wenzelm parents: 
28017diff
changeset | 295 | val params = Scan.repeat1 binding -- Scan.option ($$$ "::" |-- !!! typ) | 
| 18669 | 296 | >> (fn (xs, T) => map (rpair T) xs); | 
| 297 | ||
| 19482 
9f11af8f7ef9
tuned basic list operators (flat, maps, map_filter);
 wenzelm parents: 
19284diff
changeset | 298 | val simple_fixes = and_list1 params >> flat; | 
| 18669 | 299 | |
| 300 | val fixes = | |
| 28081 
d664b2c1dfe6
explicit type Name.binding for higher-specification elements;
 wenzelm parents: 
28017diff
changeset | 301 | and_list1 (binding -- Scan.option ($$$ "::" |-- typ) -- mixfix >> (single o triple1) || | 
| 19482 
9f11af8f7ef9
tuned basic list operators (flat, maps, map_filter);
 wenzelm parents: 
19284diff
changeset | 302 | params >> map Syntax.no_syn) >> flat; | 
| 5826 | 303 | |
| 19845 | 304 | val for_fixes = Scan.optional ($$$ "for" |-- !!! fixes) []; | 
| 21371 | 305 | val for_simple_fixes = Scan.optional ($$$ "for" |-- !!! simple_fixes) []; | 
| 19845 | 306 | |
| 5826 | 307 | |
| 27877 | 308 | (* embedded source text *) | 
| 27872 
631371a02b8c
P.doc_source and P.ml_sorce for proper SymbolPos.text;
 wenzelm parents: 
27815diff
changeset | 309 | |
| 27877 | 310 | val ML_source = source_position (group "ML source" text); | 
| 27872 
631371a02b8c
P.doc_source and P.ml_sorce for proper SymbolPos.text;
 wenzelm parents: 
27815diff
changeset | 311 | val doc_source = source_position (group "document source" text); | 
| 
631371a02b8c
P.doc_source and P.ml_sorce for proper SymbolPos.text;
 wenzelm parents: 
27815diff
changeset | 312 | |
| 
631371a02b8c
P.doc_source and P.ml_sorce for proper SymbolPos.text;
 wenzelm parents: 
27815diff
changeset | 313 | |
| 5826 | 314 | (* terms *) | 
| 315 | ||
| 7477 | 316 | val trm = short_ident || long_ident || sym_ident || term_var || number || string; | 
| 5826 | 317 | |
| 27753 
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
 wenzelm parents: 
27737diff
changeset | 318 | val term_group = group "term" trm; | 
| 
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
 wenzelm parents: 
27737diff
changeset | 319 | val prop_group = group "proposition" trm; | 
| 
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
 wenzelm parents: 
27737diff
changeset | 320 | |
| 
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
 wenzelm parents: 
27737diff
changeset | 321 | val term = inner_syntax term_group; | 
| 
94b672153b49
sort/typ/term/prop: inner_syntax markup encodes original source position;
 wenzelm parents: 
27737diff
changeset | 322 | val prop = inner_syntax prop_group; | 
| 5826 | 323 | |
| 324 | ||
| 6949 | 325 | (* patterns *) | 
| 6935 | 326 | |
| 6949 | 327 | val is_terms = Scan.repeat1 ($$$ "is" |-- term); | 
| 6935 | 328 | val is_props = Scan.repeat1 ($$$ "is" |-- prop); | 
| 329 | ||
| 19585 | 330 | val propp = prop -- Scan.optional ($$$ "(" |-- !!! (is_props --| $$$ ")")) [];
 | 
| 6949 | 331 | val termp = term -- Scan.optional ($$$ "(" |-- !!! (is_terms --| $$$ ")")) [];
 | 
| 6935 | 332 | |
| 333 | ||
| 22119 | 334 | (* targets *) | 
| 19811 | 335 | |
| 22119 | 336 | val target = ($$$ "(" -- $$$ "in") |-- !!! (xname --| $$$ ")");
 | 
| 337 | val opt_target = Scan.option target; | |
| 12272 | 338 | |
| 339 | end; | |
| 30511 | 340 | |
| 341 | type 'a parser = 'a OuterParse.parser; | |
| 342 | type 'a context_parser = 'a OuterParse.context_parser; | |
| 343 |