| author | wenzelm | 
| Wed, 10 Oct 2007 17:32:00 +0200 | |
| changeset 24952 | f336c36f41a0 | 
| parent 24910 | 53b20f786a5e | 
| child 25541 | 68de88c7e877 | 
| permissions | -rw-r--r-- | 
| 5826 | 1 | (* Title: Pure/Isar/outer_parse.ML | 
| 2 | ID: $Id$ | |
| 3 | Author: Markus Wenzel, TU Muenchen | |
| 4 | ||
| 5 | Generic parsers for Isabelle/Isar outer syntax. | |
| 6 | *) | |
| 7 | ||
| 8 | signature OUTER_PARSE = | |
| 9 | sig | |
| 10 | type token | |
| 11 | val group: string -> (token list -> 'a) -> token list -> 'a | |
| 12 | val !!! : (token list -> 'a) -> token list -> 'a | |
| 8581 
5c7ed2af8bfb
!!!! = cut "Corrupted outer syntax in presentation";
 wenzelm parents: 
8350diff
changeset | 13 | val !!!! : (token list -> 'a) -> token list -> 'a | 
| 12047 | 14 |   val triple1: ('a * 'b) * 'c -> 'a * 'b * 'c
 | 
| 15 |   val triple2: 'a * ('b * 'c) -> 'a * 'b * 'c
 | |
| 16 |   val triple_swap: ('a * 'b) * 'c -> ('a * 'c) * 'b
 | |
| 15703 | 17 | val not_eof: token list -> token * token list | 
| 5826 | 18 |   val position: (token list -> 'a * 'b) -> token list -> ('a * Position.T) * 'b
 | 
| 7026 | 19 | val command: token list -> string * token list | 
| 5826 | 20 | val keyword: token list -> string * token list | 
| 21 | val short_ident: token list -> string * token list | |
| 22 | val long_ident: token list -> string * token list | |
| 23 | val sym_ident: token list -> string * token list | |
| 15963 | 24 | val minus: token list -> string * token list | 
| 5826 | 25 | val term_var: token list -> string * token list | 
| 26 | val type_ident: token list -> string * token list | |
| 27 | val type_var: token list -> string * token list | |
| 28 | val number: token list -> string * token list | |
| 29 | val string: token list -> string * token list | |
| 22119 | 30 | val alt_string: token list -> string * token list | 
| 5826 | 31 | val verbatim: token list -> string * token list | 
| 6860 | 32 | val sync: token list -> string * token list | 
| 5826 | 33 | val eof: token list -> string * token list | 
| 15703 | 34 | val $$$ : string -> token list -> string * token list | 
| 16030 | 35 | val reserved : string -> token list -> string * token list | 
| 15703 | 36 | val semicolon: token list -> string * token list | 
| 37 | val underscore: token list -> string * token list | |
| 38 | val maybe: (token list -> 'a * token list) -> token list -> 'a option * token list | |
| 17070 | 39 | val tag_name: token list -> string * token list | 
| 40 | val tags: token list -> string list * token list | |
| 7930 | 41 | val opt_unit: token list -> unit * token list | 
| 14646 | 42 | val opt_keyword: string -> token list -> bool * token list | 
| 20983 | 43 | val begin: token list -> string * token list | 
| 20961 | 44 | val opt_begin: token list -> bool * token list | 
| 5826 | 45 | val nat: token list -> int * token list | 
| 46 | val enum: string -> (token list -> 'a * token list) -> token list -> 'a list * token list | |
| 47 | val enum1: string -> (token list -> 'a * token list) -> token list -> 'a list * token list | |
| 48 | val list: (token list -> 'a * token list) -> token list -> 'a list * token list | |
| 49 | val list1: (token list -> 'a * token list) -> token list -> 'a list * token list | |
| 6013 | 50 | val and_list: (token list -> 'a * token list) -> token list -> 'a list * token list | 
| 51 | val and_list1: (token list -> 'a * token list) -> token list -> 'a list * token list | |
| 5826 | 52 | val name: token list -> bstring * token list | 
| 53 | val xname: token list -> xstring * token list | |
| 54 | val text: token list -> string * token list | |
| 14949 | 55 | val path: token list -> Path.T * token list | 
| 18898 | 56 | val parname: token list -> string * token list | 
| 8897 | 57 | val sort: token list -> string * token list | 
| 22331 | 58 | val arity: token list -> (string * string list * string) * token list | 
| 5826 | 59 | val type_args: token list -> string list * token list | 
| 60 | val typ: token list -> string * token list | |
| 18669 | 61 | val mixfix: token list -> mixfix * token list | 
| 21609 | 62 | val mixfix': token list -> mixfix * token list | 
| 18669 | 63 | val opt_infix: token list -> mixfix * token list | 
| 21609 | 64 | val opt_infix': token list -> mixfix * token list | 
| 18669 | 65 | val opt_mixfix: token list -> mixfix * token list | 
| 66 | val opt_mixfix': token list -> mixfix * token list | |
| 21400 | 67 | val where_: token list -> string * token list | 
| 18669 | 68 | val const: token list -> (string * string * mixfix) * token list | 
| 22119 | 69 | val params: token list -> (string * string option) list * token list | 
| 19845 | 70 | val simple_fixes: token list -> (string * string option) list * token list | 
| 18669 | 71 | val fixes: token list -> (string * string option * mixfix) list * token list | 
| 19845 | 72 | val for_fixes: token list -> (string * string option * mixfix) list * token list | 
| 21371 | 73 | val for_simple_fixes: token list -> (string * string option) list * token list | 
| 5826 | 74 | val term: token list -> string * token list | 
| 75 | val prop: token list -> string * token list | |
| 19585 | 76 | val propp: token list -> (string * string list) * token list | 
| 6949 | 77 | val termp: token list -> (string * string list) * token list | 
| 22119 | 78 | val keyword_sid: token list -> string * token list | 
| 79 | val args: (string -> bool) -> bool -> token list -> Args.T list * token list | |
| 80 | val args1: (string -> bool) -> bool -> token list -> Args.T list * token list | |
| 9131 | 81 | val arguments: token list -> Args.T list * token list | 
| 22119 | 82 | val target: token list -> xstring * token list | 
| 83 | val opt_target: token list -> xstring option * token list | |
| 5826 | 84 | end; | 
| 85 | ||
| 86 | structure OuterParse: OUTER_PARSE = | |
| 87 | struct | |
| 88 | ||
| 9131 | 89 | structure T = OuterLex; | 
| 90 | type token = T.token; | |
| 5826 | 91 | |
| 92 | ||
| 93 | (** error handling **) | |
| 94 | ||
| 95 | (* group atomic parsers (no cuts!) *) | |
| 96 | ||
| 97 | fun fail_with s = Scan.fail_with | |
| 98 | (fn [] => s ^ " expected (past end-of-file!)" | |
| 23789 
1993b865c5ac
replaced OuterLex.name_of by more sophisticated OuterLex.text_of;
 wenzelm parents: 
22331diff
changeset | 99 | | (tok :: _) => | 
| 
1993b865c5ac
replaced OuterLex.name_of by more sophisticated OuterLex.text_of;
 wenzelm parents: 
22331diff
changeset | 100 | (case T.text_of tok of | 
| 
1993b865c5ac
replaced OuterLex.name_of by more sophisticated OuterLex.text_of;
 wenzelm parents: 
22331diff
changeset | 101 | (txt, "") => s ^ " expected,\nbut " ^ txt ^ T.pos_of tok ^ " was found" | 
| 
1993b865c5ac
replaced OuterLex.name_of by more sophisticated OuterLex.text_of;
 wenzelm parents: 
22331diff
changeset | 102 | | (txt1, txt2) => s ^ " expected,\nbut " ^ txt1 ^ T.pos_of tok ^ " was found:\n" ^ txt2)); | 
| 5826 | 103 | |
| 104 | fun group s scan = scan || fail_with s; | |
| 105 | ||
| 106 | ||
| 5877 | 107 | (* cut *) | 
| 5826 | 108 | |
| 8581 
5c7ed2af8bfb
!!!! = cut "Corrupted outer syntax in presentation";
 wenzelm parents: 
8350diff
changeset | 109 | fun cut kind scan = | 
| 5826 | 110 | let | 
| 111 | fun get_pos [] = " (past end-of-file!)" | |
| 9131 | 112 | | get_pos (tok :: _) = T.pos_of tok; | 
| 5826 | 113 | |
| 15531 | 114 | fun err (toks, NONE) = kind ^ get_pos toks | 
| 115 | | err (toks, SOME msg) = kind ^ get_pos toks ^ ": " ^ msg; | |
| 5826 | 116 | in Scan.!! err scan end; | 
| 117 | ||
| 8586 | 118 | fun !!! scan = cut "Outer syntax error" scan; | 
| 119 | fun !!!! scan = cut "Corrupted outer syntax in presentation" scan; | |
| 8581 
5c7ed2af8bfb
!!!! = cut "Corrupted outer syntax in presentation";
 wenzelm parents: 
8350diff
changeset | 120 | |
| 5826 | 121 | |
| 122 | ||
| 123 | (** basic parsers **) | |
| 124 | ||
| 125 | (* utils *) | |
| 126 | ||
| 127 | fun triple1 ((x, y), z) = (x, y, z); | |
| 128 | fun triple2 (x, (y, z)) = (x, y, z); | |
| 6430 | 129 | fun triple_swap ((x, y), z) = ((x, z), y); | 
| 5826 | 130 | |
| 131 | ||
| 132 | (* tokens *) | |
| 133 | ||
| 15703 | 134 | val not_eof = Scan.one T.not_eof; | 
| 135 | ||
| 136 | fun position scan = (Scan.ahead not_eof >> T.position_of) -- scan >> Library.swap; | |
| 5826 | 137 | |
| 138 | fun kind k = | |
| 9131 | 139 | group (T.str_of_kind k) (Scan.one (T.is_kind k) >> T.val_of); | 
| 5826 | 140 | |
| 9131 | 141 | val command = kind T.Command; | 
| 142 | val keyword = kind T.Keyword; | |
| 143 | val short_ident = kind T.Ident; | |
| 144 | val long_ident = kind T.LongIdent; | |
| 145 | val sym_ident = kind T.SymIdent; | |
| 146 | val term_var = kind T.Var; | |
| 147 | val type_ident = kind T.TypeIdent; | |
| 148 | val type_var = kind T.TypeVar; | |
| 149 | val number = kind T.Nat; | |
| 150 | val string = kind T.String; | |
| 17165 | 151 | val alt_string = kind T.AltString; | 
| 9131 | 152 | val verbatim = kind T.Verbatim; | 
| 153 | val sync = kind T.Sync; | |
| 154 | val eof = kind T.EOF; | |
| 5826 | 155 | |
| 156 | fun $$$ x = | |
| 9131 | 157 | group (T.str_of_kind T.Keyword ^ " " ^ quote x) | 
| 158 | (Scan.one (T.keyword_with (equal x)) >> T.val_of); | |
| 159 | ||
| 16030 | 160 | fun reserved x = | 
| 161 |   group ("Reserved identifier " ^ quote x)
 | |
| 24910 | 162 | (Scan.one (T.ident_with (fn y => x = y)) >> T.val_of); | 
| 16030 | 163 | |
| 9131 | 164 | val semicolon = $$$ ";"; | 
| 5826 | 165 | |
| 15703 | 166 | val minus = sym_ident :-- (fn "-" => Scan.succeed () | _ => Scan.fail) >> #1; | 
| 11792 
311eee3d63b6
parser for underscore (actually a symbolic identifier!);
 wenzelm parents: 
11651diff
changeset | 167 | val underscore = sym_ident :-- (fn "_" => Scan.succeed () | _ => Scan.fail) >> #1; | 
| 15703 | 168 | fun maybe scan = underscore >> K NONE || scan >> SOME; | 
| 11792 
311eee3d63b6
parser for underscore (actually a symbolic identifier!);
 wenzelm parents: 
11651diff
changeset | 169 | |
| 14835 | 170 | val nat = number >> (#1 o Library.read_int o Symbol.explode); | 
| 5826 | 171 | |
| 17070 | 172 | val tag_name = group "tag name" (short_ident || string); | 
| 173 | val tags = Scan.repeat ($$$ "%" |-- !!! tag_name); | |
| 174 | ||
| 7930 | 175 | val opt_unit = Scan.optional ($$$ "(" -- $$$ ")" >> (K ())) ();
 | 
| 14646 | 176 | fun opt_keyword s = Scan.optional ($$$ "(" |-- !!! (($$$ s >> K true) --| $$$ ")")) false;
 | 
| 177 | ||
| 20983 | 178 | val begin = $$$ "begin"; | 
| 179 | val opt_begin = Scan.optional (begin >> K true) false; | |
| 20961 | 180 | |
| 5826 | 181 | |
| 182 | (* enumerations *) | |
| 183 | ||
| 6003 | 184 | fun enum1 sep scan = scan -- Scan.repeat ($$$ sep |-- !!! scan) >> op ::; | 
| 5826 | 185 | fun enum sep scan = enum1 sep scan || Scan.succeed []; | 
| 186 | ||
| 187 | fun list1 scan = enum1 "," scan; | |
| 188 | fun list scan = enum "," scan; | |
| 189 | ||
| 6013 | 190 | fun and_list1 scan = enum1 "and" scan; | 
| 191 | fun and_list scan = enum "and" scan; | |
| 192 | ||
| 5826 | 193 | |
| 5960 | 194 | (* names and text *) | 
| 5826 | 195 | |
| 8146 | 196 | val name = group "name declaration" (short_ident || sym_ident || string || number); | 
| 197 | val xname = group "name reference" (short_ident || long_ident || sym_ident || string || number); | |
| 198 | val text = group "text" (short_ident || long_ident || sym_ident || string || number || verbatim); | |
| 21858 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
21609diff
changeset | 199 | val path = group "file name/path specification" name >> Path.explode; | 
| 6553 | 200 | |
| 18898 | 201 | val parname = Scan.optional ($$$ "(" |-- name --| $$$ ")") "";
 | 
| 202 | ||
| 6553 | 203 | |
| 6372 | 204 | (* sorts and arities *) | 
| 5826 | 205 | |
| 8897 | 206 | val sort = group "sort" xname; | 
| 5826 | 207 | |
| 22331 | 208 | val arity = xname -- ($$$ "::" |-- !!! | 
| 209 |   (Scan.optional ($$$ "(" |-- !!! (list1 sort --| $$$ ")")) [] -- sort)) >> triple2;
 | |
| 5826 | 210 | |
| 211 | ||
| 212 | (* types *) | |
| 213 | ||
| 8146 | 214 | val typ = group "type" | 
| 215 | (short_ident || long_ident || sym_ident || type_ident || type_var || string || number); | |
| 5826 | 216 | |
| 217 | val type_args = | |
| 218 | type_ident >> single || | |
| 219 |   $$$ "(" |-- !!! (list1 type_ident --| $$$ ")") ||
 | |
| 220 | Scan.succeed []; | |
| 221 | ||
| 222 | ||
| 223 | (* mixfix annotations *) | |
| 224 | ||
| 18669 | 225 | val mfix = string -- | 
| 226 | !!! (Scan.optional ($$$ "[" |-- !!! (list nat --| $$$ "]")) [] -- | |
| 227 | Scan.optional nat Syntax.max_pri) >> (Mixfix o triple2); | |
| 228 | ||
| 229 | val infx = $$$ "infix" |-- !!! (nat >> Infix || string -- nat >> InfixName); | |
| 230 | val infxl = $$$ "infixl" |-- !!! (nat >> Infixl || string -- nat >> InfixlName); | |
| 231 | val infxr = $$$ "infixr" |-- !!! (nat >> Infixr || string -- nat >> InfixrName); | |
| 5826 | 232 | |
| 18669 | 233 | val binder = $$$ "binder" |-- | 
| 234 | !!! (string -- ($$$ "[" |-- nat --| $$$ "]" -- nat || nat >> (fn n => (n, n)))) | |
| 235 | >> (Binder o triple2); | |
| 236 | ||
| 237 | fun annotation guard fix = $$$ "(" |-- guard (fix --| $$$ ")");
 | |
| 238 | fun opt_annotation guard fix = Scan.optional (annotation guard fix) NoSyn; | |
| 239 | ||
| 240 | val mixfix = annotation !!! (mfix || binder || infxl || infxr || infx); | |
| 21609 | 241 | val mixfix' = annotation I (mfix || binder || infxl || infxr || infx); | 
| 18669 | 242 | val opt_infix = opt_annotation !!! (infxl || infxr || infx); | 
| 21609 | 243 | val opt_infix' = opt_annotation I (infxl || infxr || infx); | 
| 18669 | 244 | val opt_mixfix = opt_annotation !!! (mfix || binder || infxl || infxr || infx); | 
| 245 | val opt_mixfix' = opt_annotation I (mfix || binder || infxl || infxr || infx); | |
| 5826 | 246 | |
| 247 | ||
| 18669 | 248 | (* fixes *) | 
| 5826 | 249 | |
| 21400 | 250 | val where_ = $$$ "where"; | 
| 251 | ||
| 18618 | 252 | val const = name -- ($$$ "::" |-- !!! typ) -- opt_mixfix >> triple1; | 
| 18669 | 253 | |
| 254 | val params = Scan.repeat1 name -- Scan.option ($$$ "::" |-- !!! typ) | |
| 255 | >> (fn (xs, T) => map (rpair T) xs); | |
| 256 | ||
| 19482 
9f11af8f7ef9
tuned basic list operators (flat, maps, map_filter);
 wenzelm parents: 
19284diff
changeset | 257 | val simple_fixes = and_list1 params >> flat; | 
| 18669 | 258 | |
| 259 | val fixes = | |
| 260 | and_list1 (name -- Scan.option ($$$ "::" |-- typ) -- mixfix >> (single o triple1) || | |
| 19482 
9f11af8f7ef9
tuned basic list operators (flat, maps, map_filter);
 wenzelm parents: 
19284diff
changeset | 261 | params >> map Syntax.no_syn) >> flat; | 
| 5826 | 262 | |
| 19845 | 263 | val for_fixes = Scan.optional ($$$ "for" |-- !!! fixes) []; | 
| 21371 | 264 | val for_simple_fixes = Scan.optional ($$$ "for" |-- !!! simple_fixes) []; | 
| 19845 | 265 | |
| 5826 | 266 | |
| 267 | (* terms *) | |
| 268 | ||
| 7477 | 269 | val trm = short_ident || long_ident || sym_ident || term_var || number || string; | 
| 5826 | 270 | |
| 271 | val term = group "term" trm; | |
| 272 | val prop = group "proposition" trm; | |
| 273 | ||
| 274 | ||
| 6949 | 275 | (* patterns *) | 
| 6935 | 276 | |
| 6949 | 277 | val is_terms = Scan.repeat1 ($$$ "is" |-- term); | 
| 6935 | 278 | val is_props = Scan.repeat1 ($$$ "is" |-- prop); | 
| 279 | ||
| 19585 | 280 | val propp = prop -- Scan.optional ($$$ "(" |-- !!! (is_props --| $$$ ")")) [];
 | 
| 6949 | 281 | val termp = term -- Scan.optional ($$$ "(" |-- !!! (is_terms --| $$$ ")")) [];
 | 
| 6935 | 282 | |
| 283 | ||
| 5826 | 284 | (* arguments *) | 
| 285 | ||
| 9131 | 286 | fun keyword_symid is_symid = Scan.one (T.keyword_with is_symid) >> T.val_of; | 
| 287 | val keyword_sid = keyword_symid T.is_sid; | |
| 5826 | 288 | |
| 6983 | 289 | fun atom_arg is_symid blk = | 
| 5826 | 290 | group "argument" | 
| 7477 | 291 | (position (short_ident || long_ident || sym_ident || term_var || | 
| 15703 | 292 | type_ident || type_var || number) >> Args.mk_ident || | 
| 293 | position (keyword_symid is_symid) >> Args.mk_keyword || | |
| 294 | position (string || verbatim) >> Args.mk_string || | |
| 18037 | 295 | position alt_string >> Args.mk_alt_string || | 
| 15703 | 296 | position (if blk then $$$ "," else Scan.fail) >> Args.mk_keyword); | 
| 5826 | 297 | |
| 5877 | 298 | fun paren_args l r scan = position ($$$ l) -- !!! (scan true -- position ($$$ r)) | 
| 15703 | 299 | >> (fn (x, (ys, z)) => Args.mk_keyword x :: ys @ [Args.mk_keyword z]); | 
| 5826 | 300 | |
| 6983 | 301 | fun args is_symid blk x = Scan.optional (args1 is_symid blk) [] x | 
| 302 | and args1 is_symid blk x = | |
| 5826 | 303 | ((Scan.repeat1 | 
| 6983 | 304 | (Scan.repeat1 (atom_arg is_symid blk) || | 
| 305 |       paren_args "(" ")" (args is_symid) ||
 | |
| 19482 
9f11af8f7ef9
tuned basic list operators (flat, maps, map_filter);
 wenzelm parents: 
19284diff
changeset | 306 | paren_args "[" "]" (args is_symid))) >> flat) x; | 
| 5826 | 307 | |
| 9131 | 308 | val arguments = args T.is_sid false; | 
| 309 | ||
| 5826 | 310 | |
| 22119 | 311 | (* targets *) | 
| 19811 | 312 | |
| 22119 | 313 | val target = ($$$ "(" -- $$$ "in") |-- !!! (xname --| $$$ ")");
 | 
| 314 | val opt_target = Scan.option target; | |
| 12272 | 315 | |
| 316 | end; |