| author | berghofe | 
| Wed, 11 Jul 2007 11:39:59 +0200 | |
| changeset 23762 | 24eef53a9ad3 | 
| parent 22331 | 7df6bc8cf0b0 | 
| child 23789 | 1993b865c5ac | 
| permissions | -rw-r--r-- | 
| 5826 | 1 | (* Title: Pure/Isar/outer_parse.ML | 
| 2 | ID: $Id$ | |
| 3 | Author: Markus Wenzel, TU Muenchen | |
| 4 | ||
| 5 | Generic parsers for Isabelle/Isar outer syntax. | |
| 6 | *) | |
| 7 | ||
| 8 | signature OUTER_PARSE = | |
| 9 | sig | |
| 10 | type token | |
| 11 | val group: string -> (token list -> 'a) -> token list -> 'a | |
| 12 | val !!! : (token list -> 'a) -> token list -> 'a | |
| 8581 
5c7ed2af8bfb
!!!! = cut "Corrupted outer syntax in presentation";
 wenzelm parents: 
8350diff
changeset | 13 | val !!!! : (token list -> 'a) -> token list -> 'a | 
| 12047 | 14 |   val triple1: ('a * 'b) * 'c -> 'a * 'b * 'c
 | 
| 15 |   val triple2: 'a * ('b * 'c) -> 'a * 'b * 'c
 | |
| 16 |   val triple_swap: ('a * 'b) * 'c -> ('a * 'c) * 'b
 | |
| 15703 | 17 | val not_eof: token list -> token * token list | 
| 5826 | 18 |   val position: (token list -> 'a * 'b) -> token list -> ('a * Position.T) * 'b
 | 
| 7026 | 19 | val command: token list -> string * token list | 
| 5826 | 20 | val keyword: token list -> string * token list | 
| 21 | val short_ident: token list -> string * token list | |
| 22 | val long_ident: token list -> string * token list | |
| 23 | val sym_ident: token list -> string * token list | |
| 15963 | 24 | val minus: token list -> string * token list | 
| 5826 | 25 | val term_var: token list -> string * token list | 
| 26 | val type_ident: token list -> string * token list | |
| 27 | val type_var: token list -> string * token list | |
| 28 | val number: token list -> string * token list | |
| 29 | val string: token list -> string * token list | |
| 22119 | 30 | val alt_string: token list -> string * token list | 
| 5826 | 31 | val verbatim: token list -> string * token list | 
| 6860 | 32 | val sync: token list -> string * token list | 
| 5826 | 33 | val eof: token list -> string * token list | 
| 15703 | 34 | val $$$ : string -> token list -> string * token list | 
| 16030 | 35 | val reserved : string -> token list -> string * token list | 
| 15703 | 36 | val semicolon: token list -> string * token list | 
| 37 | val underscore: token list -> string * token list | |
| 38 | val maybe: (token list -> 'a * token list) -> token list -> 'a option * token list | |
| 17070 | 39 | val tag_name: token list -> string * token list | 
| 40 | val tags: token list -> string list * token list | |
| 7930 | 41 | val opt_unit: token list -> unit * token list | 
| 14646 | 42 | val opt_keyword: string -> token list -> bool * token list | 
| 20983 | 43 | val begin: token list -> string * token list | 
| 20961 | 44 | val opt_begin: token list -> bool * token list | 
| 5826 | 45 | val nat: token list -> int * token list | 
| 46 | val enum: string -> (token list -> 'a * token list) -> token list -> 'a list * token list | |
| 47 | val enum1: string -> (token list -> 'a * token list) -> token list -> 'a list * token list | |
| 48 | val list: (token list -> 'a * token list) -> token list -> 'a list * token list | |
| 49 | val list1: (token list -> 'a * token list) -> token list -> 'a list * token list | |
| 6013 | 50 | val and_list: (token list -> 'a * token list) -> token list -> 'a list * token list | 
| 51 | val and_list1: (token list -> 'a * token list) -> token list -> 'a list * token list | |
| 5826 | 52 | val name: token list -> bstring * token list | 
| 53 | val xname: token list -> xstring * token list | |
| 54 | val text: token list -> string * token list | |
| 14949 | 55 | val path: token list -> Path.T * token list | 
| 18898 | 56 | val parname: token list -> string * token list | 
| 8897 | 57 | val sort: token list -> string * token list | 
| 22331 | 58 | val arity: token list -> (string * string list * string) * token list | 
| 5826 | 59 | val type_args: token list -> string list * token list | 
| 60 | val typ: token list -> string * token list | |
| 18669 | 61 | val mixfix: token list -> mixfix * token list | 
| 21609 | 62 | val mixfix': token list -> mixfix * token list | 
| 18669 | 63 | val opt_infix: token list -> mixfix * token list | 
| 21609 | 64 | val opt_infix': token list -> mixfix * token list | 
| 18669 | 65 | val opt_mixfix: token list -> mixfix * token list | 
| 66 | val opt_mixfix': token list -> mixfix * token list | |
| 21400 | 67 | val where_: token list -> string * token list | 
| 18669 | 68 | val const: token list -> (string * string * mixfix) * token list | 
| 22119 | 69 | val params: token list -> (string * string option) list * token list | 
| 19845 | 70 | val simple_fixes: token list -> (string * string option) list * token list | 
| 18669 | 71 | val fixes: token list -> (string * string option * mixfix) list * token list | 
| 19845 | 72 | val for_fixes: token list -> (string * string option * mixfix) list * token list | 
| 21371 | 73 | val for_simple_fixes: token list -> (string * string option) list * token list | 
| 5826 | 74 | val term: token list -> string * token list | 
| 75 | val prop: token list -> string * token list | |
| 19585 | 76 | val propp: token list -> (string * string list) * token list | 
| 6949 | 77 | val termp: token list -> (string * string list) * token list | 
| 22119 | 78 | val keyword_sid: token list -> string * token list | 
| 79 | val args: (string -> bool) -> bool -> token list -> Args.T list * token list | |
| 80 | val args1: (string -> bool) -> bool -> token list -> Args.T list * token list | |
| 9131 | 81 | val arguments: token list -> Args.T list * token list | 
| 22119 | 82 | val target: token list -> xstring * token list | 
| 83 | val opt_target: token list -> xstring option * token list | |
| 5826 | 84 | end; | 
| 85 | ||
| 86 | structure OuterParse: OUTER_PARSE = | |
| 87 | struct | |
| 88 | ||
| 9131 | 89 | structure T = OuterLex; | 
| 90 | type token = T.token; | |
| 5826 | 91 | |
| 92 | ||
| 93 | (** error handling **) | |
| 94 | ||
| 95 | (* group atomic parsers (no cuts!) *) | |
| 96 | ||
| 97 | fun fail_with s = Scan.fail_with | |
| 98 | (fn [] => s ^ " expected (past end-of-file!)" | |
| 9155 | 99 | | (tok :: _) => s ^ " expected,\nbut " ^ T.name_of tok ^ T.pos_of tok ^ " was found"); | 
| 5826 | 100 | |
| 101 | fun group s scan = scan || fail_with s; | |
| 102 | ||
| 103 | ||
| 5877 | 104 | (* cut *) | 
| 5826 | 105 | |
| 8581 
5c7ed2af8bfb
!!!! = cut "Corrupted outer syntax in presentation";
 wenzelm parents: 
8350diff
changeset | 106 | fun cut kind scan = | 
| 5826 | 107 | let | 
| 108 | fun get_pos [] = " (past end-of-file!)" | |
| 9131 | 109 | | get_pos (tok :: _) = T.pos_of tok; | 
| 5826 | 110 | |
| 15531 | 111 | fun err (toks, NONE) = kind ^ get_pos toks | 
| 112 | | err (toks, SOME msg) = kind ^ get_pos toks ^ ": " ^ msg; | |
| 5826 | 113 | in Scan.!! err scan end; | 
| 114 | ||
| 8586 | 115 | fun !!! scan = cut "Outer syntax error" scan; | 
| 116 | fun !!!! scan = cut "Corrupted outer syntax in presentation" scan; | |
| 8581 
5c7ed2af8bfb
!!!! = cut "Corrupted outer syntax in presentation";
 wenzelm parents: 
8350diff
changeset | 117 | |
| 5826 | 118 | |
| 119 | ||
| 120 | (** basic parsers **) | |
| 121 | ||
| 122 | (* utils *) | |
| 123 | ||
| 124 | fun triple1 ((x, y), z) = (x, y, z); | |
| 125 | fun triple2 (x, (y, z)) = (x, y, z); | |
| 6430 | 126 | fun triple_swap ((x, y), z) = ((x, z), y); | 
| 5826 | 127 | |
| 128 | ||
| 129 | (* tokens *) | |
| 130 | ||
| 15703 | 131 | val not_eof = Scan.one T.not_eof; | 
| 132 | ||
| 133 | fun position scan = (Scan.ahead not_eof >> T.position_of) -- scan >> Library.swap; | |
| 5826 | 134 | |
| 135 | fun kind k = | |
| 9131 | 136 | group (T.str_of_kind k) (Scan.one (T.is_kind k) >> T.val_of); | 
| 5826 | 137 | |
| 9131 | 138 | val command = kind T.Command; | 
| 139 | val keyword = kind T.Keyword; | |
| 140 | val short_ident = kind T.Ident; | |
| 141 | val long_ident = kind T.LongIdent; | |
| 142 | val sym_ident = kind T.SymIdent; | |
| 143 | val term_var = kind T.Var; | |
| 144 | val type_ident = kind T.TypeIdent; | |
| 145 | val type_var = kind T.TypeVar; | |
| 146 | val number = kind T.Nat; | |
| 147 | val string = kind T.String; | |
| 17165 | 148 | val alt_string = kind T.AltString; | 
| 9131 | 149 | val verbatim = kind T.Verbatim; | 
| 150 | val sync = kind T.Sync; | |
| 151 | val eof = kind T.EOF; | |
| 5826 | 152 | |
| 153 | fun $$$ x = | |
| 9131 | 154 | group (T.str_of_kind T.Keyword ^ " " ^ quote x) | 
| 155 | (Scan.one (T.keyword_with (equal x)) >> T.val_of); | |
| 156 | ||
| 16030 | 157 | fun reserved x = | 
| 158 |   group ("Reserved identifier " ^ quote x)
 | |
| 159 | (Scan.one (T.ident_with (equal x)) >> T.val_of); | |
| 160 | ||
| 9131 | 161 | val semicolon = $$$ ";"; | 
| 5826 | 162 | |
| 15703 | 163 | val minus = sym_ident :-- (fn "-" => Scan.succeed () | _ => Scan.fail) >> #1; | 
| 11792 
311eee3d63b6
parser for underscore (actually a symbolic identifier!);
 wenzelm parents: 
11651diff
changeset | 164 | val underscore = sym_ident :-- (fn "_" => Scan.succeed () | _ => Scan.fail) >> #1; | 
| 15703 | 165 | fun maybe scan = underscore >> K NONE || scan >> SOME; | 
| 11792 
311eee3d63b6
parser for underscore (actually a symbolic identifier!);
 wenzelm parents: 
11651diff
changeset | 166 | |
| 14835 | 167 | val nat = number >> (#1 o Library.read_int o Symbol.explode); | 
| 5826 | 168 | |
| 17070 | 169 | val tag_name = group "tag name" (short_ident || string); | 
| 170 | val tags = Scan.repeat ($$$ "%" |-- !!! tag_name); | |
| 171 | ||
| 7930 | 172 | val opt_unit = Scan.optional ($$$ "(" -- $$$ ")" >> (K ())) ();
 | 
| 14646 | 173 | fun opt_keyword s = Scan.optional ($$$ "(" |-- !!! (($$$ s >> K true) --| $$$ ")")) false;
 | 
| 174 | ||
| 20983 | 175 | val begin = $$$ "begin"; | 
| 176 | val opt_begin = Scan.optional (begin >> K true) false; | |
| 20961 | 177 | |
| 5826 | 178 | |
| 179 | (* enumerations *) | |
| 180 | ||
| 6003 | 181 | fun enum1 sep scan = scan -- Scan.repeat ($$$ sep |-- !!! scan) >> op ::; | 
| 5826 | 182 | fun enum sep scan = enum1 sep scan || Scan.succeed []; | 
| 183 | ||
| 184 | fun list1 scan = enum1 "," scan; | |
| 185 | fun list scan = enum "," scan; | |
| 186 | ||
| 6013 | 187 | fun and_list1 scan = enum1 "and" scan; | 
| 188 | fun and_list scan = enum "and" scan; | |
| 189 | ||
| 5826 | 190 | |
| 5960 | 191 | (* names and text *) | 
| 5826 | 192 | |
| 8146 | 193 | val name = group "name declaration" (short_ident || sym_ident || string || number); | 
| 194 | val xname = group "name reference" (short_ident || long_ident || sym_ident || string || number); | |
| 195 | val text = group "text" (short_ident || long_ident || sym_ident || string || number || verbatim); | |
| 21858 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
21609diff
changeset | 196 | val path = group "file name/path specification" name >> Path.explode; | 
| 6553 | 197 | |
| 18898 | 198 | val parname = Scan.optional ($$$ "(" |-- name --| $$$ ")") "";
 | 
| 199 | ||
| 6553 | 200 | |
| 6372 | 201 | (* sorts and arities *) | 
| 5826 | 202 | |
| 8897 | 203 | val sort = group "sort" xname; | 
| 5826 | 204 | |
| 22331 | 205 | val arity = xname -- ($$$ "::" |-- !!! | 
| 206 |   (Scan.optional ($$$ "(" |-- !!! (list1 sort --| $$$ ")")) [] -- sort)) >> triple2;
 | |
| 5826 | 207 | |
| 208 | ||
| 209 | (* types *) | |
| 210 | ||
| 8146 | 211 | val typ = group "type" | 
| 212 | (short_ident || long_ident || sym_ident || type_ident || type_var || string || number); | |
| 5826 | 213 | |
| 214 | val type_args = | |
| 215 | type_ident >> single || | |
| 216 |   $$$ "(" |-- !!! (list1 type_ident --| $$$ ")") ||
 | |
| 217 | Scan.succeed []; | |
| 218 | ||
| 219 | ||
| 220 | (* mixfix annotations *) | |
| 221 | ||
| 18669 | 222 | val mfix = string -- | 
| 223 | !!! (Scan.optional ($$$ "[" |-- !!! (list nat --| $$$ "]")) [] -- | |
| 224 | Scan.optional nat Syntax.max_pri) >> (Mixfix o triple2); | |
| 225 | ||
| 226 | val infx = $$$ "infix" |-- !!! (nat >> Infix || string -- nat >> InfixName); | |
| 227 | val infxl = $$$ "infixl" |-- !!! (nat >> Infixl || string -- nat >> InfixlName); | |
| 228 | val infxr = $$$ "infixr" |-- !!! (nat >> Infixr || string -- nat >> InfixrName); | |
| 5826 | 229 | |
| 18669 | 230 | val binder = $$$ "binder" |-- | 
| 231 | !!! (string -- ($$$ "[" |-- nat --| $$$ "]" -- nat || nat >> (fn n => (n, n)))) | |
| 232 | >> (Binder o triple2); | |
| 233 | ||
| 234 | fun annotation guard fix = $$$ "(" |-- guard (fix --| $$$ ")");
 | |
| 235 | fun opt_annotation guard fix = Scan.optional (annotation guard fix) NoSyn; | |
| 236 | ||
| 237 | val mixfix = annotation !!! (mfix || binder || infxl || infxr || infx); | |
| 21609 | 238 | val mixfix' = annotation I (mfix || binder || infxl || infxr || infx); | 
| 18669 | 239 | val opt_infix = opt_annotation !!! (infxl || infxr || infx); | 
| 21609 | 240 | val opt_infix' = opt_annotation I (infxl || infxr || infx); | 
| 18669 | 241 | val opt_mixfix = opt_annotation !!! (mfix || binder || infxl || infxr || infx); | 
| 242 | val opt_mixfix' = opt_annotation I (mfix || binder || infxl || infxr || infx); | |
| 5826 | 243 | |
| 244 | ||
| 18669 | 245 | (* fixes *) | 
| 5826 | 246 | |
| 21400 | 247 | val where_ = $$$ "where"; | 
| 248 | ||
| 18618 | 249 | val const = name -- ($$$ "::" |-- !!! typ) -- opt_mixfix >> triple1; | 
| 18669 | 250 | |
| 251 | val params = Scan.repeat1 name -- Scan.option ($$$ "::" |-- !!! typ) | |
| 252 | >> (fn (xs, T) => map (rpair T) xs); | |
| 253 | ||
| 19482 
9f11af8f7ef9
tuned basic list operators (flat, maps, map_filter);
 wenzelm parents: 
19284diff
changeset | 254 | val simple_fixes = and_list1 params >> flat; | 
| 18669 | 255 | |
| 256 | val fixes = | |
| 257 | and_list1 (name -- Scan.option ($$$ "::" |-- typ) -- mixfix >> (single o triple1) || | |
| 19482 
9f11af8f7ef9
tuned basic list operators (flat, maps, map_filter);
 wenzelm parents: 
19284diff
changeset | 258 | params >> map Syntax.no_syn) >> flat; | 
| 5826 | 259 | |
| 19845 | 260 | val for_fixes = Scan.optional ($$$ "for" |-- !!! fixes) []; | 
| 21371 | 261 | val for_simple_fixes = Scan.optional ($$$ "for" |-- !!! simple_fixes) []; | 
| 19845 | 262 | |
| 5826 | 263 | |
| 264 | (* terms *) | |
| 265 | ||
| 7477 | 266 | val trm = short_ident || long_ident || sym_ident || term_var || number || string; | 
| 5826 | 267 | |
| 268 | val term = group "term" trm; | |
| 269 | val prop = group "proposition" trm; | |
| 270 | ||
| 271 | ||
| 6949 | 272 | (* patterns *) | 
| 6935 | 273 | |
| 6949 | 274 | val is_terms = Scan.repeat1 ($$$ "is" |-- term); | 
| 6935 | 275 | val is_props = Scan.repeat1 ($$$ "is" |-- prop); | 
| 276 | ||
| 19585 | 277 | val propp = prop -- Scan.optional ($$$ "(" |-- !!! (is_props --| $$$ ")")) [];
 | 
| 6949 | 278 | val termp = term -- Scan.optional ($$$ "(" |-- !!! (is_terms --| $$$ ")")) [];
 | 
| 6935 | 279 | |
| 280 | ||
| 5826 | 281 | (* arguments *) | 
| 282 | ||
| 9131 | 283 | fun keyword_symid is_symid = Scan.one (T.keyword_with is_symid) >> T.val_of; | 
| 284 | val keyword_sid = keyword_symid T.is_sid; | |
| 5826 | 285 | |
| 6983 | 286 | fun atom_arg is_symid blk = | 
| 5826 | 287 | group "argument" | 
| 7477 | 288 | (position (short_ident || long_ident || sym_ident || term_var || | 
| 15703 | 289 | type_ident || type_var || number) >> Args.mk_ident || | 
| 290 | position (keyword_symid is_symid) >> Args.mk_keyword || | |
| 291 | position (string || verbatim) >> Args.mk_string || | |
| 18037 | 292 | position alt_string >> Args.mk_alt_string || | 
| 15703 | 293 | position (if blk then $$$ "," else Scan.fail) >> Args.mk_keyword); | 
| 5826 | 294 | |
| 5877 | 295 | fun paren_args l r scan = position ($$$ l) -- !!! (scan true -- position ($$$ r)) | 
| 15703 | 296 | >> (fn (x, (ys, z)) => Args.mk_keyword x :: ys @ [Args.mk_keyword z]); | 
| 5826 | 297 | |
| 6983 | 298 | fun args is_symid blk x = Scan.optional (args1 is_symid blk) [] x | 
| 299 | and args1 is_symid blk x = | |
| 5826 | 300 | ((Scan.repeat1 | 
| 6983 | 301 | (Scan.repeat1 (atom_arg is_symid blk) || | 
| 302 |       paren_args "(" ")" (args is_symid) ||
 | |
| 19482 
9f11af8f7ef9
tuned basic list operators (flat, maps, map_filter);
 wenzelm parents: 
19284diff
changeset | 303 | paren_args "[" "]" (args is_symid))) >> flat) x; | 
| 5826 | 304 | |
| 9131 | 305 | val arguments = args T.is_sid false; | 
| 306 | ||
| 5826 | 307 | |
| 22119 | 308 | (* targets *) | 
| 19811 | 309 | |
| 22119 | 310 | val target = ($$$ "(" -- $$$ "in") |-- !!! (xname --| $$$ ")");
 | 
| 311 | val opt_target = Scan.option target; | |
| 12272 | 312 | |
| 313 | end; |